1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.ExceptionNO._ 26import xiangshan.frontend.FtqPtr 27import xiangshan.backend.fu.FuConfig._ 28import xiangshan.backend.fu.FuType 29import xiangshan.backend.fu.fpu.FPU 30import xiangshan.backend.rob.RobLsqIO 31import xiangshan.mem.Bundles._ 32import xiangshan.backend.rob.RobPtr 33import xiangshan.backend.Bundles.{MemExuOutput, DynInst} 34import xiangshan.backend.fu.FuConfig.LduCfg 35import xiangshan.cache.mmu.HasTlbConst 36import xiangshan.cache._ 37import xiangshan.cache.wpu.ReplayCarry 38 39class LoadMisalignBuffer(implicit p: Parameters) extends XSModule 40 with HasCircularQueuePtrHelper 41 with HasLoadHelper 42 with HasTlbConst 43{ 44 private val enqPortNum = LoadPipelineWidth 45 private val maxSplitNum = 2 46 47 require(maxSplitNum == 2) 48 49 private val LB = "b00".U(2.W) 50 private val LH = "b01".U(2.W) 51 private val LW = "b10".U(2.W) 52 private val LD = "b11".U(2.W) 53 54 // encode of how many bytes to shift or truncate 55 private val BYTE0 = "b000".U(3.W) 56 private val BYTE1 = "b001".U(3.W) 57 private val BYTE2 = "b010".U(3.W) 58 private val BYTE3 = "b011".U(3.W) 59 private val BYTE4 = "b100".U(3.W) 60 private val BYTE5 = "b101".U(3.W) 61 private val BYTE6 = "b110".U(3.W) 62 private val BYTE7 = "b111".U(3.W) 63 64 def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 65 LB -> 0x1.U, // lb 66 LH -> 0x3.U, // lh 67 LW -> 0xf.U, // lw 68 LD -> 0xff.U // ld 69 )) 70 71 def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 72 val shiftData = LookupTree(shiftEncode, List( 73 BYTE0 -> data(63, 0), 74 BYTE1 -> data(63, 8), 75 BYTE2 -> data(63, 16), 76 BYTE3 -> data(63, 24), 77 BYTE4 -> data(63, 32), 78 BYTE5 -> data(63, 40), 79 BYTE6 -> data(63, 48), 80 BYTE7 -> data(63, 56) 81 )) 82 val truncateData = LookupTree(truncateEncode, List( 83 BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 84 BYTE1 -> shiftData(7, 0), 85 BYTE2 -> shiftData(15, 0), 86 BYTE3 -> shiftData(23, 0), 87 BYTE4 -> shiftData(31, 0), 88 BYTE5 -> shiftData(39, 0), 89 BYTE6 -> shiftData(47, 0), 90 BYTE7 -> shiftData(55, 0) 91 )) 92 truncateData(XLEN - 1, 0) 93 } 94 95 def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 96 assert(valid.length == bits.length) 97 if (valid.length == 0 || valid.length == 1) { 98 (valid, bits) 99 } else if (valid.length == 2) { 100 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 101 for (i <- res.indices) { 102 res(i).valid := valid(i) 103 res(i).bits := bits(i) 104 } 105 val oldest = Mux(valid(0) && valid(1), 106 Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 107 (bits(0).uop.robIdx === bits(1).uop.robIdx && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 108 Mux(valid(0) && !valid(1), res(0), res(1))) 109 (Seq(oldest.valid), Seq(oldest.bits)) 110 } else { 111 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 112 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 113 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 114 } 115 } 116 117 val io = IO(new Bundle() { 118 val redirect = Flipped(Valid(new Redirect)) 119 val enq = Vec(enqPortNum, Flipped(new MisalignBufferEnqIO)) 120 val rob = Flipped(new RobLsqIO) 121 val splitLoadReq = Decoupled(new LsPipelineBundle) 122 val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 123 val writeBack = Decoupled(new MemExuOutput) 124 val vecWriteBack = Decoupled(new VecPipelineFeedbackIO(isVStore = false)) 125 val loadOutValid = Input(Bool()) 126 val loadVecOutValid = Input(Bool()) 127 val overwriteExpBuf = Output(new XSBundle { 128 val valid = Bool() 129 val vaddr = UInt(XLEN.W) 130 val isHyper = Bool() 131 val gpaddr = UInt(XLEN.W) 132 val isForVSnonLeafPTE = Bool() 133 }) 134 val flushLdExpBuff = Output(Bool()) 135 val loadMisalignFull = Output(Bool()) 136 }) 137 138 io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 139 io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 140 141 val req_valid = RegInit(false.B) 142 val req = Reg(new LqWriteBundle) 143 144 io.loadMisalignFull := req_valid 145 146 (0 until io.enq.length).map{i => 147 if (i == 0) { 148 io.enq(0).req.ready := !req_valid && io.enq(0).req.valid 149 } 150 else { 151 io.enq(i).req.ready := !io.enq.take(i).map(_.req.ready).reduce(_ || _) && !req_valid && io.enq(i).req.valid 152 } 153 } 154 155 val select_req_bit = ParallelPriorityMux(io.enq.map(_.req.valid), io.enq.map(_.req.bits)) 156 val select_req_valid = io.enq.map(_.req.valid).reduce(_ || _) 157 val canEnqValid = !req_valid && !select_req_bit.uop.robIdx.needFlush(io.redirect) && select_req_valid 158 when(canEnqValid) { 159 req := select_req_bit 160 req_valid := true.B 161 } 162 163 // buffer control: 164 // - s_idle: idle 165 // - s_split: split misalign laod 166 // - s_req: issue a split memory access request 167 // - s_resp: Responds to a split load access request 168 // - s_comb_wakeup_rep: Merge the data and issue a wakeup load 169 // - s_wb: writeback yo rob/vecMergeBuffer 170 val s_idle :: s_split :: s_req :: s_resp :: s_comb_wakeup_rep :: s_wb :: Nil = Enum(6) 171 val bufferState = RegInit(s_idle) 172 val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 173 val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 174 val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec())) 175 val unSentLoads = RegInit(0.U(maxSplitNum.W)) 176 val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 177 val needWakeUpReqsWire = Wire(Bool()) 178 val needWakeUpWB = RegInit(false.B) 179 val data_select = RegEnable(genRdataOH(select_req_bit.uop), 0.U(genRdataOH(select_req_bit.uop).getWidth.W), canEnqValid) 180 181 // if there is exception or uncache in split load 182 val globalException = RegInit(false.B) 183 val globalUncache = RegInit(false.B) 184 185 // debug info 186 val globalMMIO = RegInit(false.B) 187 val globalNC = RegInit(false.B) 188 189 val hasException = io.splitLoadResp.bits.vecActive && 190 ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR || TriggerAction.isDmode(io.splitLoadResp.bits.uop.trigger) 191 val isUncache = io.splitLoadResp.bits.mmio || io.splitLoadResp.bits.nc 192 needWakeUpReqsWire := false.B 193 switch(bufferState) { 194 is (s_idle) { 195 when (req_valid) { 196 bufferState := s_split 197 } 198 } 199 200 is (s_split) { 201 bufferState := s_req 202 } 203 204 is (s_req) { 205 when (io.splitLoadReq.fire) { 206 bufferState := s_resp 207 } 208 } 209 210 is (s_resp) { 211 when (io.splitLoadResp.valid) { 212 val clearOh = UIntToOH(curPtr) 213 when (hasException || isUncache) { 214 // commit directly when exception ocurs 215 // if any split load reaches uncache space, delegate to software loadAddrMisaligned exception 216 bufferState := s_wb 217 globalException := hasException 218 globalUncache := isUncache 219 globalMMIO := io.splitLoadResp.bits.mmio 220 globalNC := io.splitLoadResp.bits.nc 221 } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 222 // need replay or still has unsent requests 223 bufferState := s_req 224 } .otherwise { 225 // merge the split load results 226 bufferState := s_comb_wakeup_rep 227 needWakeUpWB := !req.isvec 228 } 229 } 230 } 231 232 is (s_comb_wakeup_rep) { 233 when(!req.isvec) { 234 when(io.splitLoadReq.fire) { 235 bufferState := s_wb 236 }.otherwise { 237 bufferState := s_comb_wakeup_rep 238 } 239 needWakeUpReqsWire := true.B 240 } .otherwise { 241 bufferState := s_wb 242 } 243 244 } 245 246 is (s_wb) { 247 when(req.isvec) { 248 when(io.vecWriteBack.fire) { 249 bufferState := s_idle 250 req_valid := false.B 251 curPtr := 0.U 252 unSentLoads := 0.U 253 globalException := false.B 254 globalUncache := false.B 255 needWakeUpWB := false.B 256 257 globalMMIO := false.B 258 globalNC := false.B 259 } 260 261 } .otherwise { 262 when(io.writeBack.fire) { 263 bufferState := s_idle 264 req_valid := false.B 265 curPtr := 0.U 266 unSentLoads := 0.U 267 globalException := false.B 268 globalUncache := false.B 269 needWakeUpWB := false.B 270 271 globalMMIO := false.B 272 globalNC := false.B 273 } 274 } 275 276 } 277 } 278 279 val alignedType = Mux(req.isvec, req.alignedType(1,0), req.uop.fuOpType(1, 0)) 280 val highAddress = LookupTree(alignedType, List( 281 LB -> 0.U, 282 LH -> 1.U, 283 LW -> 3.U, 284 LD -> 7.U 285 )) + req.vaddr(4, 0) 286 // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 287 val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 288 val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 289 val aligned16BytesSel = req.vaddr(3, 0) 290 291 // meta of 128 bit load 292 val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 293 // meta of split loads 294 val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 295 val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 296 val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 297 val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 298 val highResultShift = RegInit(0.U(3.W)) 299 val highResultWidth = RegInit(0.U(3.W)) 300 301 when (bufferState === s_split) { 302 when (!cross16BytesBoundary) { 303 assert(false.B, s"There should be no non-aligned access that does not cross 16Byte boundaries.") 304 } .otherwise { 305 // split this unaligned load into `maxSplitNum` aligned loads 306 unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 307 curPtr := 0.U 308 lowAddrLoad.uop := req.uop 309 lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 310 lowAddrLoad.fullva := req.fullva 311 highAddrLoad.uop := req.uop 312 highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 313 highAddrLoad.fullva := req.fullva 314 315 switch (alignedType(1, 0)) { 316 is (LB) { 317 assert(false.B, "lb should not trigger miss align") 318 } 319 320 is (LH) { 321 lowAddrLoad.uop.fuOpType := LB 322 lowAddrLoad.vaddr := req.vaddr 323 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 324 lowResultShift := BYTE0 325 lowResultWidth := BYTE1 326 327 highAddrLoad.uop.fuOpType := LB 328 highAddrLoad.vaddr := req.vaddr + 1.U 329 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 330 highResultShift := BYTE0 331 highResultWidth := BYTE1 332 } 333 334 is (LW) { 335 switch (req.vaddr(1, 0)) { 336 is ("b00".U) { 337 assert(false.B, "should not trigger miss align") 338 } 339 340 is ("b01".U) { 341 lowAddrLoad.uop.fuOpType := LW 342 lowAddrLoad.vaddr := req.vaddr - 1.U 343 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 344 lowResultShift := BYTE1 345 lowResultWidth := BYTE3 346 347 highAddrLoad.uop.fuOpType := LB 348 highAddrLoad.vaddr := req.vaddr + 3.U 349 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 350 highResultShift := BYTE0 351 highResultWidth := BYTE1 352 } 353 354 is ("b10".U) { 355 lowAddrLoad.uop.fuOpType := LH 356 lowAddrLoad.vaddr := req.vaddr 357 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 358 lowResultShift := BYTE0 359 lowResultWidth := BYTE2 360 361 highAddrLoad.uop.fuOpType := LH 362 highAddrLoad.vaddr := req.vaddr + 2.U 363 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 364 highResultShift := BYTE0 365 highResultWidth := BYTE2 366 } 367 368 is ("b11".U) { 369 lowAddrLoad.uop.fuOpType := LB 370 lowAddrLoad.vaddr := req.vaddr 371 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 372 lowResultShift := BYTE0 373 lowResultWidth := BYTE1 374 375 highAddrLoad.uop.fuOpType := LW 376 highAddrLoad.vaddr := req.vaddr + 1.U 377 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 378 highResultShift := BYTE0 379 highResultWidth := BYTE3 380 } 381 } 382 } 383 384 is (LD) { 385 switch (req.vaddr(2, 0)) { 386 is ("b000".U) { 387 assert(false.B, "should not trigger miss align") 388 } 389 390 is ("b001".U) { 391 lowAddrLoad.uop.fuOpType := LD 392 lowAddrLoad.vaddr := req.vaddr - 1.U 393 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 394 lowResultShift := BYTE1 395 lowResultWidth := BYTE7 396 397 highAddrLoad.uop.fuOpType := LB 398 highAddrLoad.vaddr := req.vaddr + 7.U 399 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 400 highResultShift := BYTE0 401 highResultWidth := BYTE1 402 } 403 404 is ("b010".U) { 405 lowAddrLoad.uop.fuOpType := LD 406 lowAddrLoad.vaddr := req.vaddr - 2.U 407 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 408 lowResultShift := BYTE2 409 lowResultWidth := BYTE6 410 411 highAddrLoad.uop.fuOpType := LH 412 highAddrLoad.vaddr := req.vaddr + 6.U 413 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 414 highResultShift := BYTE0 415 highResultWidth := BYTE2 416 } 417 418 is ("b011".U) { 419 lowAddrLoad.uop.fuOpType := LD 420 lowAddrLoad.vaddr := req.vaddr - 3.U 421 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 422 lowResultShift := BYTE3 423 lowResultWidth := BYTE5 424 425 highAddrLoad.uop.fuOpType := LW 426 highAddrLoad.vaddr := req.vaddr + 5.U 427 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 428 highResultShift := BYTE0 429 highResultWidth := BYTE3 430 } 431 432 is ("b100".U) { 433 lowAddrLoad.uop.fuOpType := LW 434 lowAddrLoad.vaddr := req.vaddr 435 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 436 lowResultShift := BYTE0 437 lowResultWidth := BYTE4 438 439 highAddrLoad.uop.fuOpType := LW 440 highAddrLoad.vaddr := req.vaddr + 4.U 441 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 442 highResultShift := BYTE0 443 highResultWidth := BYTE4 444 } 445 446 is ("b101".U) { 447 lowAddrLoad.uop.fuOpType := LW 448 lowAddrLoad.vaddr := req.vaddr - 1.U 449 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 450 lowResultShift := BYTE1 451 lowResultWidth := BYTE3 452 453 highAddrLoad.uop.fuOpType := LD 454 highAddrLoad.vaddr := req.vaddr + 3.U 455 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 456 highResultShift := BYTE0 457 highResultWidth := BYTE5 458 } 459 460 is ("b110".U) { 461 lowAddrLoad.uop.fuOpType := LH 462 lowAddrLoad.vaddr := req.vaddr 463 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 464 lowResultShift := BYTE0 465 lowResultWidth := BYTE2 466 467 highAddrLoad.uop.fuOpType := LD 468 highAddrLoad.vaddr := req.vaddr + 2.U 469 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 470 highResultShift := BYTE0 471 highResultWidth := BYTE6 472 } 473 474 is ("b111".U) { 475 lowAddrLoad.uop.fuOpType := LB 476 lowAddrLoad.vaddr := req.vaddr 477 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 478 lowResultShift := BYTE0 479 lowResultWidth := BYTE1 480 481 highAddrLoad.uop.fuOpType := LD 482 highAddrLoad.vaddr := req.vaddr + 1.U 483 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 484 highResultShift := BYTE0 485 highResultWidth := BYTE7 486 } 487 } 488 } 489 } 490 491 splitLoadReqs(0) := lowAddrLoad 492 splitLoadReqs(1) := highAddrLoad 493 } 494 exceptionVec := 0.U.asTypeOf(exceptionVec.cloneType) 495 } 496 497 io.splitLoadReq.valid := req_valid && (bufferState === s_req || bufferState === s_comb_wakeup_rep && needWakeUpReqsWire && !req.isvec) 498 io.splitLoadReq.bits := splitLoadReqs(curPtr) 499 io.splitLoadReq.bits.isvec := req.isvec 500 io.splitLoadReq.bits.misalignNeedWakeUp := needWakeUpReqsWire 501 io.splitLoadReq.bits.isFinalSplit := curPtr(0) && !needWakeUpReqsWire 502 // Restore the information of H extension load 503 // bit encoding: | hlv 1 | hlvx 1 | is unsigned(1bit) | size(2bit) | 504 val reqIsHlv = LSUOpType.isHlv(req.uop.fuOpType) 505 val reqIsHlvx = LSUOpType.isHlvx(req.uop.fuOpType) 506 io.splitLoadReq.bits.uop.fuOpType := Mux(req.isvec, req.uop.fuOpType, Cat(reqIsHlv, reqIsHlvx, 0.U(1.W), splitLoadReqs(curPtr).uop.fuOpType(1, 0))) 507 io.splitLoadReq.bits.alignedType := Mux(req.isvec, splitLoadReqs(curPtr).uop.fuOpType(1, 0), req.alignedType) 508 509 when (io.splitLoadResp.valid) { 510 val resp = io.splitLoadResp.bits 511 splitLoadResp(curPtr) := io.splitLoadResp.bits 512 when (isUncache) { 513 unSentLoads := 0.U 514 exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(exceptionVec.cloneType), LduCfg) 515 // delegate to software 516 exceptionVec(loadAddrMisaligned) := true.B 517 } .elsewhen (hasException) { 518 unSentLoads := 0.U 519 LduCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no)) 520 } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 521 unSentLoads := unSentLoads & ~UIntToOH(curPtr) 522 curPtr := curPtr + 1.U 523 exceptionVec := 0.U.asTypeOf(ExceptionVec()) 524 } 525 } 526 527 val combinedData = RegInit(0.U(XLEN.W)) 528 529 when (bufferState === s_comb_wakeup_rep) { 530 val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 531 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 532 val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 533 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 534 val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 535 (0 until XLEN / 8) .map { 536 case i => { 537 when (i.U < lowResultWidth) { 538 catResult(i) := lowAddrResult(i) 539 } .otherwise { 540 catResult(i) := highAddrResult(i.U - lowResultWidth) 541 } 542 } 543 } 544 combinedData := Mux(req.isvec, rdataVecHelper(req.alignedType, (catResult.asUInt)(XLEN - 1, 0)), rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0))) 545 546 } 547 548 io.writeBack.valid := req_valid && (bufferState === s_wb) && (io.splitLoadResp.valid && io.splitLoadResp.bits.misalignNeedWakeUp || globalUncache || globalException) && !io.loadOutValid && !req.isvec 549 io.writeBack.bits.uop := req.uop 550 io.writeBack.bits.uop.exceptionVec := DontCare 551 LduCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalUncache || globalException) && exceptionVec(no)) 552 io.writeBack.bits.uop.rfWen := !globalException && !globalUncache && req.uop.rfWen 553 io.writeBack.bits.uop.fuType := FuType.ldu.U 554 io.writeBack.bits.uop.flushPipe := false.B 555 io.writeBack.bits.uop.replayInst := false.B 556 io.writeBack.bits.data := newRdataHelper(data_select, combinedData) 557 io.writeBack.bits.isFromLoadUnit := needWakeUpWB 558 // Misaligned accesses to uncache space trigger exceptions, so theoretically these signals won't do anything practical. 559 // But let's get them assigned correctly. 560 io.writeBack.bits.debug.isMMIO := globalMMIO 561 io.writeBack.bits.debug.isNC := globalNC 562 io.writeBack.bits.debug.isPerfCnt := false.B 563 io.writeBack.bits.debug.paddr := req.paddr 564 io.writeBack.bits.debug.vaddr := req.vaddr 565 566 567 // vector output 568 io.vecWriteBack.valid := req_valid && (bufferState === s_wb) && !io.loadVecOutValid && req.isvec 569 570 io.vecWriteBack.bits.alignedType := req.alignedType 571 io.vecWriteBack.bits.vecFeedback := true.B 572 io.vecWriteBack.bits.vecdata.get := combinedData 573 io.vecWriteBack.bits.isvec := req.isvec 574 io.vecWriteBack.bits.elemIdx := req.elemIdx 575 io.vecWriteBack.bits.elemIdxInsideVd.get := req.elemIdxInsideVd 576 io.vecWriteBack.bits.mask := req.mask 577 io.vecWriteBack.bits.reg_offset.get := 0.U 578 io.vecWriteBack.bits.usSecondInv := req.usSecondInv 579 io.vecWriteBack.bits.mBIndex := req.mbIndex 580 io.vecWriteBack.bits.hit := true.B 581 io.vecWriteBack.bits.sourceType := RSFeedbackType.lrqFull 582 io.vecWriteBack.bits.trigger := TriggerAction.None 583 io.vecWriteBack.bits.flushState := DontCare 584 io.vecWriteBack.bits.exceptionVec := ExceptionNO.selectByFu(exceptionVec, VlduCfg) 585 io.vecWriteBack.bits.hasException := globalException 586 io.vecWriteBack.bits.vaddr := req.fullva 587 io.vecWriteBack.bits.vaNeedExt := req.vaNeedExt 588 io.vecWriteBack.bits.gpaddr := req.gpaddr 589 io.vecWriteBack.bits.isForVSnonLeafPTE := req.isForVSnonLeafPTE 590 io.vecWriteBack.bits.mmio := globalMMIO 591 io.vecWriteBack.bits.vstart := req.uop.vpu.vstart 592 io.vecWriteBack.bits.vecTriggerMask := req.vecTriggerMask 593 io.vecWriteBack.bits.nc := globalNC 594 595 596 val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 597 598 when (flush) { 599 bufferState := s_idle 600 req_valid := false.B 601 curPtr := 0.U 602 unSentLoads := 0.U 603 globalException := false.B 604 globalUncache := false.B 605 606 globalMMIO := false.B 607 globalNC := false.B 608 } 609 610 // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 611 // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 612 val shouldOverwrite = req_valid && globalException 613 val overwriteExpBuf = GatedValidRegNext(shouldOverwrite) 614 val overwriteVaddr = RegEnable( 615 Mux( 616 cross16BytesBoundary && (curPtr === 1.U), 617 splitLoadResp(curPtr).vaddr, 618 splitLoadResp(curPtr).fullva), 619 shouldOverwrite) 620 val overwriteGpaddr = RegEnable(splitLoadResp(curPtr).gpaddr, shouldOverwrite) 621 val overwriteIsHyper = RegEnable(splitLoadResp(curPtr).isHyper, shouldOverwrite) 622 val overwriteIsForVSnonLeafPTE = RegEnable(splitLoadResp(curPtr).isForVSnonLeafPTE, shouldOverwrite) 623 624 //TODO In theory, there is no need to overwrite, but for now, the signal is retained in the code in this way. 625 // and the signal will be removed after sufficient verification. 626 io.overwriteExpBuf.valid := false.B 627 io.overwriteExpBuf.vaddr := overwriteVaddr 628 io.overwriteExpBuf.isHyper := overwriteIsHyper 629 io.overwriteExpBuf.gpaddr := overwriteGpaddr 630 io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 631 632 // when no exception or uncache, flush loadExceptionBuffer at s_wb 633 val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalUncache || globalException)) 634 io.flushLdExpBuff := flushLdExpBuff 635 636 XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 637 XSPerfAccumulate("flush", flush) 638 XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 639 XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 640} 641