1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.cache._ 26import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants} 27import xiangshan.mem._ 28import xiangshan.backend.rob.RobPtr 29 30 31// Data module define 32// These data modules are like SyncDataModuleTemplate, but support cam-like ops 33class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters { 34 val io = IO(new Bundle { 35 // sync read 36 val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) 37 val rdata = Output(Vec(numRead, UInt(dataWidth.W))) // rdata: store addr 38 val rlineflag = Output(Vec(numRead, Bool())) // rdata: line op flag 39 // write 40 val wen = Input(Vec(numWrite, Bool())) 41 val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) 42 val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) // wdata: store addr 43 val wmask = Input(Vec(numWrite, UInt((VLEN/8).W))) 44 val wlineflag = Input(Vec(numWrite, Bool())) // wdata: line op flag 45 // forward addr cam 46 val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) // addr 47 val forwardDataMask = Input(Vec(numForward, UInt((VLEN/8).W))) // forward mask 48 val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) // cam result mask 49 // debug 50 val debug_data = Output(Vec(numEntries, UInt(dataWidth.W))) 51 }) 52 53 val data = Reg(Vec(numEntries, UInt(dataWidth.W))) 54 val mask = Reg(Vec(numEntries, UInt((VLEN/8).W))) 55 val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag 56 // if lineflag == true, this address points to a whole cacheline 57 io.debug_data := data 58 59 // read ports 60 for (i <- 0 until numRead) { 61 io.rdata(i) := data(GatedRegNext(io.raddr(i))) 62 io.rlineflag(i) := lineflag(GatedRegNext(io.raddr(i))) 63 } 64 65 // below is the write ports (with priorities) 66 for (i <- 0 until numWrite) { 67 when (io.wen(i)) { 68 data(io.waddr(i)) := io.wdata(i) 69 mask(io.waddr(i)) := io.wmask(i) 70 lineflag(io.waddr(i)) := io.wlineflag(i) 71 } 72 } 73 74 // content addressed match 75 for (i <- 0 until numForward) { 76 for (j <- 0 until numEntries) { 77 // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3) 78 val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset) 79 val hit128bit = (io.forwardMdata(i)(DCacheLineOffset-1, DCacheVWordOffset) === data(j)(DCacheLineOffset-1, DCacheVWordOffset)) && 80 (!StoreQueueForwardWithMask.B || (mask(j) & io.forwardDataMask(i)).orR) 81 io.forwardMmask(i)(j) := linehit && (hit128bit || lineflag(j)) 82 } 83 } 84 85 // DataModuleTemplate should not be used when there're any write conflicts 86 for (i <- 0 until numWrite) { 87 for (j <- i+1 until numWrite) { 88 assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) 89 } 90 } 91} 92 93class SQData8Entry(implicit p: Parameters) extends XSBundle { 94 val valid = Bool() // this byte is valid 95 val data = UInt((XLEN/8).W) 96} 97 98class SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule 99 with HasDCacheParameters 100 with HasCircularQueuePtrHelper 101{ 102 val io = IO(new Bundle() { 103 // sync read port 104 val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) 105 val rdata = Vec(numRead, Output(new SQData8Entry)) 106 // data write port 107 val data = new Bundle() { 108 val wen = Vec(numWrite, Input(Bool())) 109 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 110 val wdata = Vec(numWrite, Input(UInt((XLEN/8).W))) 111 } 112 // mask (data valid) write port 113 val mask = new Bundle() { 114 val wen = Vec(numWrite, Input(Bool())) 115 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 116 val wdata = Vec(numWrite, Input(Bool())) 117 } 118 119 // st-ld forward addr cam result input, used to select forward data 120 val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W)))) 121 // forward result valid bit generated in current cycle 122 val forwardValidFast = Vec(numForward, Output(Bool())) 123 // forward result generated in the next cycle 124 val forwardValid = Vec(numForward, Output(Bool())) // forwardValid = RegNext(forwardValidFast) 125 val forwardData = Vec(numForward, Output(UInt(8.W))) 126 }) 127 128 io := DontCare 129 130 val data = Reg(Vec(numEntries, new SQData8Entry)) 131 132 require(isPow2(StoreQueueNWriteBanks)) 133 require(StoreQueueNWriteBanks > 1) 134 def get_bank(in: UInt): UInt = in(log2Up(StoreQueueNWriteBanks) -1, 0) 135 def get_bank_index(in: UInt): UInt = in >> log2Up(StoreQueueNWriteBanks) 136 def get_vec_index(index: Int, bank: Int): Int = { 137 (index << log2Up(StoreQueueNWriteBanks)) + bank 138 } 139 140 // writeback to sq 141 // store queue data write takes 2 cycles 142 // (0 until numWrite).map(i => { 143 // when(RegNext(io.data.wen(i))){ 144 // data(RegNext(io.data.waddr(i))).data := RegNext(io.data.wdata(i)) 145 // } 146 // }) 147 (0 until numWrite).map(i => { 148 val s0_wenVec = Wire(Vec(StoreQueueNWriteBanks, Bool())) 149 for(bank <- 0 until StoreQueueNWriteBanks) { 150 s0_wenVec(bank) := io.data.wen(i) && get_bank(io.data.waddr(i)) === bank.U 151 } 152 val s1_wenVec = GatedValidRegNext(s0_wenVec) 153 (0 until StoreQueueNWriteBanks).map(bank => { 154 val s0_wen = s0_wenVec(bank) 155 val s1_wen = s1_wenVec(bank) 156 val s1_wdata = RegEnable(io.data.wdata(i), s0_wen) 157 val s1_waddr = RegEnable(get_bank_index(io.data.waddr(i)), s0_wen) 158 val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks 159 (0 until numRegsPerBank).map(index => { 160 when(s1_wen && s1_waddr === index.U){ 161 data(get_vec_index(index, bank)).data := s1_wdata 162 } 163 }) 164 s0_wen.suggestName("data_s0_wen_" + i +"_bank_" + bank) 165 s1_wen.suggestName("data_s1_wen_" + i +"_bank_" + bank) 166 s1_wdata.suggestName("data_s1_wdata_" + i +"_bank_" + bank) 167 s1_waddr.suggestName("data_s1_waddr_" + i +"_bank_" + bank) 168 }) 169 }) 170 171 // (0 until numWrite).map(i => { 172 // when(RegNext(io.mask.wen(i))){ 173 // data(RegNext(io.mask.waddr(i))).valid := RegNext(io.mask.wdata(i)) 174 // } 175 // }) 176 (0 until numWrite).map(i => { 177 val s0_wenVec = Wire(Vec(StoreQueueNWriteBanks, Bool())) 178 for(bank <- 0 until StoreQueueNWriteBanks) { 179 s0_wenVec(bank) := io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U 180 } 181 val s1_wenVec = GatedValidRegNext(s0_wenVec) 182 183 (0 until StoreQueueNWriteBanks).map(bank => { 184 // val s0_wen = io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U 185 // val s1_wen = RegNext(s0_wen) 186 val s0_wen = s0_wenVec(bank) 187 val s1_wen = s1_wenVec(bank) 188 val s1_wdata = RegEnable(io.mask.wdata(i), s0_wen) 189 val s1_waddr = RegEnable(get_bank_index(io.mask.waddr(i)), s0_wen) 190 val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks 191 (0 until numRegsPerBank).map(index => { 192 when(s1_wen && s1_waddr === index.U){ 193 data(get_vec_index(index, bank)).valid := s1_wdata 194 } 195 }) 196 s0_wen.suggestName("mask_s0_wen_" + i +"_bank_" + bank) 197 s1_wen.suggestName("mask_s1_wen_" + i +"_bank_" + bank) 198 s1_wdata.suggestName("mask_s1_wdata_" + i +"_bank_" + bank) 199 s1_waddr.suggestName("mask_s1_waddr_" + i +"_bank_" + bank) 200 }) 201 }) 202 203 // destorequeue read data 204 (0 until numRead).map(i => { 205 io.rdata(i) := data(GatedRegNext(io.raddr(i))) 206 }) 207 208 // DataModuleTemplate should not be used when there're any write conflicts 209 for (i <- 0 until numWrite) { 210 for (j <- i+1 until numWrite) { 211 assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j))) 212 } 213 } 214 for (i <- 0 until numWrite) { 215 for (j <- i+1 until numWrite) { 216 assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j))) 217 } 218 } 219 220 // forwarding 221 // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases: 222 // (1) if they have the same flag, we need to check range(tail, sqIdx) 223 // (2) if they have different flags, we need to check range(tail, VirtualLoadQueueSize) and range(0, sqIdx) 224 // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, VirtualLoadQueueSize)) 225 // Forward2: Mux(same_flag, 0.U, range(0, sqIdx) ) 226 // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise 227 228 // entry with larger index should have higher priority since it's data is younger 229 230 (0 until numForward).map(i => { 231 // parallel fwd logic 232 val matchResultVec = Wire(Vec(numEntries * 2, new FwdEntry)) 233 234 def parallelFwd(xs: Seq[Data]): Data = { 235 ParallelOperation(xs, (a: Data, b: Data) => { 236 val l = a.asTypeOf(new FwdEntry) 237 val r = b.asTypeOf(new FwdEntry) 238 val res = Wire(new FwdEntry) 239 res.validFast := l.validFast || r.validFast 240 res.valid := l.valid || r.valid 241 // res.valid := RegNext(res.validFast) 242 res.data := Mux(r.valid, r.data, l.data) 243 res 244 }) 245 } 246 247 for (j <- 0 until numEntries) { 248 val needCheck0 = io.needForward(i)(0)(j) 249 val needCheck1 = io.needForward(i)(1)(j) 250 val needCheck0Reg = RegNext(needCheck0) 251 val needCheck1Reg = RegNext(needCheck1) 252 253 matchResultVec(j).validFast := needCheck0 && data(j).valid 254 matchResultVec(j).valid := needCheck0Reg && data(j).valid 255 matchResultVec(j).data := data(j).data 256 matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid 257 matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid 258 matchResultVec(numEntries + j).data := data(j).data 259 } 260 261 val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry) 262 263 // validFast is generated the same cycle with query 264 io.forwardValidFast(i) := parallelFwdResult.validFast 265 // valid is generated 1 cycle after query request 266 io.forwardValid(i) := parallelFwdResult.valid 267 // data is generated 1 cycle after query request 268 io.forwardData(i) := parallelFwdResult.data 269 }) 270} 271 272class SQDataEntry(implicit p: Parameters) extends XSBundle { 273 val mask = UInt((VLEN/8).W) 274 val data = UInt(VLEN.W) 275} 276 277// SQDataModule is a wrapper of SQData8Modules 278class SQDataModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { 279 val io = IO(new Bundle() { 280 // sync read port 281 val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) 282 val rdata = Vec(numRead, Output(new SQDataEntry)) 283 // data write port 284 val data = new Bundle() { 285 val wen = Vec(numWrite, Input(Bool())) 286 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 287 val wdata = Vec(numWrite, Input(UInt(VLEN.W))) 288 } 289 // mask (data valid) write port 290 val mask = new Bundle() { 291 val wen = Vec(numWrite, Input(Bool())) 292 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 293 val wdata = Vec(numWrite, Input(UInt((VLEN/8).W))) 294 } 295 296 // st-ld forward addr cam result input, used to select forward data 297 val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W)))) 298 // forward result valid bit generated in current cycle 299 val forwardMaskFast = Vec(numForward, Output(Vec((VLEN/8), Bool()))) 300 // forward result generated in the next cycle 301 val forwardMask = Vec(numForward, Output(Vec((VLEN/8), Bool()))) // forwardMask = RegNext(forwardMaskFast) 302 val forwardData = Vec(numForward, Output(Vec((VLEN/8), UInt(8.W)))) 303 }) 304 305 val data16 = Seq.fill(16)(Module(new SQData8Module(numEntries, numRead, numWrite, numForward))) 306 307 // writeback to lq/sq 308 for (i <- 0 until numWrite) { 309 // write to data16 310 for (j <- 0 until 16) { 311 data16(j).io.mask.waddr(i) := io.mask.waddr(i) 312 data16(j).io.mask.wdata(i) := io.mask.wdata(i)(j) 313 data16(j).io.mask.wen(i) := io.mask.wen(i) 314 data16(j).io.data.waddr(i) := io.data.waddr(i) 315 data16(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j) 316 data16(j).io.data.wen(i) := io.data.wen(i) 317 } 318 } 319 320 // destorequeue read data 321 for (i <- 0 until numRead) { 322 for (j <- 0 until 16) { 323 data16(j).io.raddr(i) := io.raddr(i) 324 } 325 io.rdata(i).mask := VecInit((0 until 16).map(j => data16(j).io.rdata(i).valid)).asUInt 326 io.rdata(i).data := VecInit((0 until 16).map(j => data16(j).io.rdata(i).data)).asUInt 327 } 328 329 // DataModuleTemplate should not be used when there're any write conflicts 330 for (i <- 0 until numWrite) { 331 for (j <- i+1 until numWrite) { 332 assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j))) 333 } 334 } 335 for (i <- 0 until numWrite) { 336 for (j <- i+1 until numWrite) { 337 assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j))) 338 } 339 } 340 341 (0 until numForward).map(i => { 342 // parallel fwd logic 343 for (j <- 0 until 16) { 344 data16(j).io.needForward(i) <> io.needForward(i) 345 io.forwardMaskFast(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValidFast(i))) 346 io.forwardMask(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValid(i))) 347 io.forwardData(i) := VecInit((0 until 16).map(j => data16(j).io.forwardData(i))) 348 } 349 }) 350} 351