1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.backend 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} 23import utils._ 24import utility._ 25import xiangshan._ 26import xiangshan.backend.exu._ 27import xiangshan.backend.fu._ 28import xiangshan.backend.rob._ 29import xiangshan.backend.dispatch._ 30import xiangshan.mem._ 31 32class FakeMemBlockWbSource()(implicit p: Parameters) extends LazyModule 33 with HasXSParameter with HasWritebackSource { 34 lazy val module = new FakeMemBlockWbSourceImp(this) 35 36 override val writebackSourceParams: Seq[WritebackSourceParams] = { 37 val params = new WritebackSourceParams 38 params.exuConfigs = (loadExuConfigs ++ storeExuConfigs).map(cfg => Seq(cfg)) 39 Seq(params) 40 } 41 override lazy val writebackSourceImp: HasWritebackSourceImp = module 42} 43 44class FakeMemBlockWbSourceImp(outer: FakeMemBlockWbSource) extends LazyModuleImp(outer) 45 with HasXSParameter 46 with HasWritebackSourceImp 47{ 48 val io = IO(new Bundle() { 49 val in = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuOutput))) 50 val out = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuOutput)) 51 }) 52 override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.out)) 53 io.out <> io.in 54} 55 56// Merge CtrlBlock, exuBlocks, wbArbiter, wb2Ctrl, etc into 1 module 57class Backend(memWbSource: HasWritebackSource)(implicit p: Parameters) extends LazyModule 58 with HasXSParameter 59 with HasExuWbHelper 60{ 61 val wbArbiter = LazyModule(new WbArbiterWrapper(exuConfigs, NRIntWritePorts, NRFpWritePorts)) 62 val intWbPorts = wbArbiter.intWbPorts 63 val fpWbPorts = wbArbiter.fpWbPorts 64 65 // TODO: better RS organization 66 // generate rs according to number of function units 67 require(exuParameters.JmpCnt == 1) 68 require(exuParameters.MduCnt <= exuParameters.AluCnt && exuParameters.MduCnt > 0) 69 require(exuParameters.FmiscCnt <= exuParameters.FmacCnt && exuParameters.FmiscCnt > 0) 70 require(exuParameters.LduCnt == exuParameters.StuCnt) // TODO: remove this limitation 71 72 // one RS every 2 MDUs 73 val schedulePorts = Seq( 74 // exuCfg, numDeq, intFastWakeupTarget, fpFastWakeupTarget 75 Seq( 76 (AluExeUnitCfg, exuParameters.AluCnt, Seq(AluExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq()), 77 (MulDivExeUnitCfg, exuParameters.MduCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg), Seq()), 78 (JumpCSRExeUnitCfg, 1, Seq(), Seq()), 79 (LdExeUnitCfg, exuParameters.LduCnt, Seq(AluExeUnitCfg, LdExeUnitCfg), Seq()), 80 (StaExeUnitCfg, exuParameters.StuCnt, Seq(), Seq()), 81 (StdExeUnitCfg, exuParameters.StuCnt, Seq(), Seq()) 82 ), 83 Seq( 84 (FmacExeUnitCfg, exuParameters.FmacCnt, Seq(), Seq(FmacExeUnitCfg, FmiscExeUnitCfg)), 85 (FmiscExeUnitCfg, exuParameters.FmiscCnt, Seq(), Seq()) 86 ) 87 ) 88 89 // should do outer fast wakeup ports here 90 val otherFastPorts = schedulePorts.zipWithIndex.map { case (sche, i) => 91 val otherCfg = schedulePorts.zipWithIndex.filter(_._2 != i).map(_._1).reduce(_ ++ _) 92 val outerPorts = sche.map(cfg => { 93 // exe units from this scheduler need fastUops from exeunits 94 val outerWakeupInSche = sche.filter(_._1.wakeupFromExu) 95 val intraIntScheOuter = outerWakeupInSche.filter(_._3.contains(cfg._1)).map(_._1) 96 val intraFpScheOuter = outerWakeupInSche.filter(_._4.contains(cfg._1)).map(_._1) 97 // exe units from other schedulers need fastUop from outside 98 val otherIntSource = otherCfg.filter(_._3.contains(cfg._1)).map(_._1) 99 val otherFpSource = otherCfg.filter(_._4.contains(cfg._1)).map(_._1) 100 val intSource = findInWbPorts(intWbPorts, intraIntScheOuter ++ otherIntSource) 101 val fpSource = findInWbPorts(fpWbPorts, intraFpScheOuter ++ otherFpSource) 102 getFastWakeupIndex(cfg._1, intSource, fpSource, intWbPorts.length).sorted 103 }) 104 println(s"inter-scheduler wakeup sources for $i: $outerPorts") 105 outerPorts 106 } 107 108 // allow mdu and fmisc to have 2*numDeq enqueue ports 109 val intDpPorts = (0 until exuParameters.AluCnt).map(i => { 110 if (i < exuParameters.JmpCnt) Seq((0, i), (1, i), (2, i)) 111 else if (i < 2 * exuParameters.MduCnt) Seq((0, i), (1, i)) 112 else Seq((0, i)) 113 }) 114 val lsDpPorts = (0 until exuParameters.LduCnt).map(i => Seq((3, i))) ++ 115 (0 until exuParameters.StuCnt).map(i => Seq((4, i))) ++ 116 (0 until exuParameters.StuCnt).map(i => Seq((5, i))) 117 val fpDpPorts = (0 until exuParameters.FmacCnt).map(i => { 118 if (i < 2 * exuParameters.FmiscCnt) Seq((0, i), (1, i)) 119 else Seq((0, i)) 120 }) 121 122 val dispatchPorts = Seq(intDpPorts ++ lsDpPorts, fpDpPorts) 123 124 val outIntRfReadPorts = Seq(0, 0) 125 val outFpRfReadPorts = Seq(0, StorePipelineWidth) 126 val hasIntRf = Seq(true, false) 127 val hasFpRf = Seq(false, true) 128 129 val exuBlocks = schedulePorts.zip(dispatchPorts).zip(otherFastPorts).zipWithIndex.map { 130 case (((sche, disp), other), i) => 131 LazyModule(new ExuBlock(sche, disp, intWbPorts, fpWbPorts, other, outIntRfReadPorts(i), outFpRfReadPorts(i), hasIntRf(i), hasFpRf(i))) 132 } 133 134 val fakeMemBlockWbSource = LazyModule(new FakeMemBlockWbSource()) 135 136 val wb2Ctrl = LazyModule(new Wb2Ctrl(exuConfigs)) 137 wb2Ctrl.addWritebackSink(exuBlocks :+ fakeMemBlockWbSource) 138 val dpExuConfigs = exuBlocks.flatMap(_.scheduler.dispatch2.map(_.configs)) 139 val ctrlBlock = LazyModule(new CtrlBlock(dpExuConfigs)) 140 val writebackSources = Seq(Seq(wb2Ctrl), Seq(wbArbiter)) 141 writebackSources.foreach(s => ctrlBlock.addWritebackSink(s)) 142 143 lazy val module = new BackendImp(this) 144} 145 146class BackendImp(outer: Backend)(implicit p: Parameters) extends LazyModuleImp(outer) 147 with HasXSParameter 148{ 149 val io = IO(new Bundle() { 150 val hartId = Input(UInt(64.W)) 151 val cpu_halt = Output(Bool()) 152 153 val memBlock = new Bundle() { // TODO: use class 154 val redirect = ValidIO(new Redirect) 155 val issue = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuInput)) 156 val loadFastMatch = Vec(exuParameters.LduCnt, Output(UInt(exuParameters.LduCnt.W))) 157 val loadFastFuOpType = Vec(exuParameters.LduCnt, Output(FuOpType())) 158 val loadFastImm = Vec(exuParameters.LduCnt, Output(UInt(12.W))) 159 val rsfeedback = Vec(exuParameters.LsExuCnt, Flipped(new MemRSFeedbackIO()(p.alter((site, here, up) => { 160 case XSCoreParamsKey => up(XSCoreParamsKey).copy( 161 IssQueSize = IssQueSize * 2 162 ) 163 })))) 164 val loadPc = Vec(exuParameters.LduCnt, Output(UInt(VAddrBits.W))) 165 val storePc = Vec(exuParameters.StuCnt, Output(UInt(VAddrBits.W))) 166 val stIssuePtr = Input(new SqPtr()) 167 val writeback = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuOutput))) 168 val s3_delayed_load_error = Vec(exuParameters.LduCnt, Input(Bool())) 169 val otherFastWakeup = Vec(exuParameters.LduCnt + 2 * exuParameters.StuCnt, Flipped(ValidIO(new MicroOp))) 170 val stIn = Vec(exuParameters.StuCnt, Flipped(ValidIO(new ExuInput))) 171 val memoryViolation = Flipped(ValidIO(new Redirect)) 172 val sfence = Output(new SfenceBundle) 173 val tlbCsr = Output(new TlbCsrBundle) 174 val fenceToSbuffer = new FenceToSbuffer 175 val enqLsq = Flipped(new LsqEnqIO) 176 val lsqio = new Bundle { 177 val exceptionAddr = Flipped(new ExceptionAddrIO) // to csr 178 val rob = new RobLsqIO // rob to lsq 179 val lqCanAccept = Input(Bool()) 180 val sqCanAccept = Input(Bool()) 181 } 182 val csrCtrl = new CustomCSRCtrlIO 183 val lqCancelCnt = Input(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 184 val sqCancelCnt = Input(UInt(log2Up(StoreQueueSize + 1).W)) 185 val scommit = Input(UInt(log2Ceil(EnsbufferWidth + 1).W)) 186 val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 187 val debug_ls = Flipped(new DebugLSIO) 188 val lsTopdownInfo = Vec(exuParameters.LduCnt, Input(new LsTopdownInfo)) 189 } 190 191 val frontend = new Bundle() { // TODO: use class 192 val frontend2Ctrl = Flipped(new FrontendToCtrlIO) 193 val sfence = Output(new SfenceBundle) 194 val tlbCsr = Output(new TlbCsrBundle) 195 val csrCtrl = Output(new CustomCSRCtrlIO) 196 val fencei = Output(Bool()) 197 } 198 199 // CSR related 200 val perf = Input(new PerfCounterIO) 201 val externalInterrupt = new ExternalInterruptIO 202 val distributedUpdate = Vec(2, Flipped(new DistributedCSRUpdateReq)) 203 204 val l2_pf_enable = Output(Bool()) 205 206 val debugTopDown = new Bundle { 207 val fromRob = new RobCoreTopDownIO 208 val fromCore = new CoreDispatchTopDownIO 209 } 210 val debugRolling = new RobDebugRollingIO 211 }) 212 213 private val ctrlBlock = outer.ctrlBlock.module 214 private val wb2Ctrl = outer.wb2Ctrl.module 215 private val exuBlocks = outer.exuBlocks.map(_.module) 216 private val wbArbiter = outer.wbArbiter.module 217 218 val mem = io.memBlock 219 val frontend = io.frontend 220 221 outer.fakeMemBlockWbSource.module.io.in <> mem.writeback 222 223 ctrlBlock.io.hartId := io.hartId 224 exuBlocks.foreach(_.io.hartId := io.hartId) 225 wbArbiter.io.hartId := io.hartId 226 227 io.cpu_halt := ctrlBlock.io.cpu_halt 228 229 wbArbiter.io.redirect <> ctrlBlock.io.redirect 230 231 val allWriteback = exuBlocks.flatMap(_.io.fuWriteback) ++ outer.fakeMemBlockWbSource.module.io.out 232 require(exuConfigs.length == allWriteback.length, s"${exuConfigs.length} != ${allWriteback.length}") 233 wbArbiter.io.in <> allWriteback 234 val rfWriteback = wbArbiter.io.out 235 236 // memblock error exception writeback, 1 cycle after normal writeback 237 wb2Ctrl.io.s3_delayed_load_error <> mem.s3_delayed_load_error 238 239 wb2Ctrl.io.redirect <> ctrlBlock.io.redirect 240 outer.wb2Ctrl.generateWritebackIO() 241 242 require(exuBlocks.count(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)) == 1) 243 val csrFenceMod = exuBlocks.filter(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)).head 244 val csrioIn = csrFenceMod.io.fuExtra.csrio.get 245 val fenceio = csrFenceMod.io.fuExtra.fenceio.get 246 247 ctrlBlock.io.frontend <> frontend.frontend2Ctrl 248 frontend.sfence <> fenceio.sfence 249 frontend.tlbCsr <> csrioIn.tlb 250 frontend.csrCtrl <> csrioIn.customCtrl 251 frontend.fencei := fenceio.fencei 252 253 ctrlBlock.io.csrCtrl <> csrioIn.customCtrl 254 val redirectBlocks = exuBlocks.reverse.filter(_.fuConfigs.map(_._1).map(_.hasRedirect).reduce(_ || _)) 255 ctrlBlock.io.exuRedirect <> redirectBlocks.flatMap(_.io.fuExtra.exuRedirect) 256 ctrlBlock.io.stIn <> mem.stIn 257 ctrlBlock.io.memoryViolation <> mem.memoryViolation 258 exuBlocks.head.io.scheExtra.enqLsq.get <> mem.enqLsq 259 exuBlocks.foreach(b => { 260 b.io.scheExtra.lcommit := mem.lcommit 261 b.io.scheExtra.scommit := mem.scommit 262 b.io.scheExtra.lqCancelCnt := mem.lqCancelCnt 263 b.io.scheExtra.sqCancelCnt := mem.sqCancelCnt 264 }) 265 val sourceModules = outer.writebackSources.map(_.map(_.module.asInstanceOf[HasWritebackSourceImp])) 266 outer.ctrlBlock.generateWritebackIO() 267 268 val allFastUop = exuBlocks.flatMap(b => b.io.fastUopOut.dropRight(b.numOutFu)) ++ mem.otherFastWakeup 269 require(allFastUop.length == exuConfigs.length, s"${allFastUop.length} != ${exuConfigs.length}") 270 val intFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1) 271 val fpFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1) 272 val intFastUop1 = outer.wbArbiter.intConnections.map(c => intFastUop(c.head)) 273 val fpFastUop1 = outer.wbArbiter.fpConnections.map(c => fpFastUop(c.head)) 274 val allFastUop1 = intFastUop1 ++ fpFastUop1 275 276 ctrlBlock.io.dispatch <> exuBlocks.flatMap(_.io.in) 277 ctrlBlock.io.rsReady := exuBlocks.flatMap(_.io.scheExtra.rsReady) 278 ctrlBlock.io.enqLsq <> mem.enqLsq 279 ctrlBlock.io.lqDeq := mem.lcommit 280 ctrlBlock.io.sqDeq := mem.scommit 281 ctrlBlock.io.lqCanAccept := mem.lsqio.lqCanAccept 282 ctrlBlock.io.sqCanAccept := mem.lsqio.sqCanAccept 283 ctrlBlock.io.lqCancelCnt := mem.lqCancelCnt 284 ctrlBlock.io.sqCancelCnt := mem.sqCancelCnt 285 ctrlBlock.io.robHeadLsIssue := exuBlocks.map(_.io.scheExtra.robHeadLsIssue).reduce(_ || _) 286 287 exuBlocks(0).io.scheExtra.fpRfReadIn.get <> exuBlocks(1).io.scheExtra.fpRfReadOut.get 288 exuBlocks(0).io.scheExtra.fpStateReadIn.get <> exuBlocks(1).io.scheExtra.fpStateReadOut.get 289 290 for((c, e) <- ctrlBlock.io.ld_pc_read.zip(exuBlocks(0).io.issue.get)){ 291 // read load pc at load s0 292 c.ptr := e.bits.uop.cf.ftqPtr 293 c.offset := e.bits.uop.cf.ftqOffset 294 } 295 // return load pc at load s2 296 mem.loadPc <> VecInit(ctrlBlock.io.ld_pc_read.map(_.data)) 297 298 for((c, e) <- ctrlBlock.io.st_pc_read.zip(exuBlocks(0).io.issue.get.drop(exuParameters.LduCnt))){ 299 // read store pc at store s0 300 c.ptr := e.bits.uop.cf.ftqPtr 301 c.offset := e.bits.uop.cf.ftqOffset 302 } 303 // return store pc at store s2 304 mem.storePc <> VecInit(ctrlBlock.io.st_pc_read.map(_.data)) 305 306 mem.issue <> exuBlocks(0).io.issue.get 307 // By default, instructions do not have exceptions when they enter the function units. 308 mem.issue.map(_.bits.uop.clearExceptions()) 309 exuBlocks(0).io.scheExtra.loadFastMatch.get <> mem.loadFastMatch 310 exuBlocks(0).io.scheExtra.loadFastFuOpType.get <> mem.loadFastFuOpType 311 exuBlocks(0).io.scheExtra.loadFastImm.get <> mem.loadFastImm 312 313 val stdIssue = exuBlocks(0).io.issue.get.takeRight(exuParameters.StuCnt) 314 exuBlocks.map(_.io).foreach { exu => 315 exu.redirect <> ctrlBlock.io.redirect 316 exu.allocPregs <> ctrlBlock.io.allocPregs 317 exu.rfWriteback <> rfWriteback 318 exu.fastUopIn <> allFastUop1 319 exu.scheExtra.jumpPc <> ctrlBlock.io.jumpPc 320 exu.scheExtra.jalr_target <> ctrlBlock.io.jalr_target 321 exu.scheExtra.stIssuePtr <> mem.stIssuePtr 322 exu.scheExtra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat 323 exu.scheExtra.debug_int_rat <> ctrlBlock.io.debug_int_rat 324 exu.scheExtra.robDeqPtr := ctrlBlock.io.robDeqPtr 325 exu.scheExtra.memWaitUpdateReq.staIssue.zip(mem.stIn).foreach{case (sink, src) => { 326 sink.bits := src.bits 327 sink.valid := src.valid 328 }} 329 exu.scheExtra.memWaitUpdateReq.stdIssue.zip(stdIssue).foreach{case (sink, src) => { 330 sink.valid := src.valid 331 sink.bits := src.bits 332 }} 333 } 334 335 XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1) 336 XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1) 337 338 ctrlBlock.perfinfo.perfEventsEu0 := exuBlocks(0).getPerf.dropRight(outer.exuBlocks(0).scheduler.numRs) 339 ctrlBlock.perfinfo.perfEventsEu1 := exuBlocks(1).getPerf.dropRight(outer.exuBlocks(1).scheduler.numRs) 340 ctrlBlock.perfinfo.perfEventsRs := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs)) 341 342 csrioIn.hartId <> io.hartId 343 344 val perf = WireInit(io.perf) // other perf events are assigned outside the backend 345 perf.retiredInstr <> ctrlBlock.io.robio.toCSR.perfinfo.retiredInstr 346 perf.ctrlInfo <> ctrlBlock.io.perfInfo.ctrlInfo 347 perf.perfEventsCtrl <> ctrlBlock.getPerf 348 csrioIn.perf <> perf 349 350 csrioIn.fpu.fflags <> ctrlBlock.io.robio.toCSR.fflags 351 csrioIn.fpu.isIllegal := false.B 352 csrioIn.fpu.dirty_fs <> ctrlBlock.io.robio.toCSR.dirty_fs 353 csrioIn.fpu.frm <> exuBlocks(1).io.fuExtra.frm.get 354 csrioIn.exception <> ctrlBlock.io.robio.exception 355 csrioIn.isXRet <> ctrlBlock.io.robio.toCSR.isXRet 356 csrioIn.trapTarget <> ctrlBlock.io.robio.toCSR.trapTarget 357 csrioIn.interrupt <> ctrlBlock.io.robio.toCSR.intrBitSet 358 csrioIn.wfi_event <> ctrlBlock.io.robio.toCSR.wfiEvent 359 csrioIn.memExceptionVAddr <> mem.lsqio.exceptionAddr.vaddr 360 361 csrioIn.externalInterrupt := io.externalInterrupt 362 363 csrioIn.distributedUpdate := io.distributedUpdate 364 365 mem.sfence <> fenceio.sfence 366 mem.fenceToSbuffer <> fenceio.sbuffer 367 368 mem.redirect <> ctrlBlock.io.redirect 369 mem.rsfeedback <> exuBlocks(0).io.scheExtra.feedback.get 370 mem.csrCtrl <> csrioIn.customCtrl 371 mem.tlbCsr <> csrioIn.tlb 372 mem.lsqio.rob <> ctrlBlock.io.robio.lsq 373 mem.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.robio.exception.bits.uop.ctrl.commitType) 374 mem.debug_ls <> ctrlBlock.io.robio.debug_ls 375 mem.lsTopdownInfo <> ctrlBlock.io.robio.lsTopdownInfo 376 377 // if l2 prefetcher use stream prefetch, it should be placed in XSCore 378 io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable 379 380 io.debugTopDown.fromRob := ctrlBlock.io.debugTopDown.fromRob 381 ctrlBlock.io.debugTopDown.fromCore := io.debugTopDown.fromCore 382 io.debugRolling := ctrlBlock.io.debugRolling 383 384 val resetTree = ResetGenNode( 385 exuBlocks.tail.map(m => ModuleNode(m)) 386 :+ ModuleNode(wbArbiter) 387 :+ ModuleNode(ctrlBlock) 388 ) 389 ResetGen(resetTree, reset, !p(DebugOptionsKey).FPGAPlatform) 390} 391