1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.ExceptionNO._ 26import xiangshan.frontend.FtqPtr 27import xiangshan.backend.fu.FuConfig._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.rob.RobLsqIO 30import xiangshan.backend.rob.RobPtr 31import xiangshan.backend.Bundles._ 32import xiangshan.backend.fu.FuConfig.StaCfg 33import xiangshan.backend.fu.FuType.isVStore 34import xiangshan.mem.Bundles._ 35import xiangshan.cache._ 36import xiangshan.cache.wpu.ReplayCarry 37 38class StoreMisalignBuffer(implicit p: Parameters) extends XSModule 39 with HasCircularQueuePtrHelper 40{ 41 private val enqPortNum = StorePipelineWidth 42 private val maxSplitNum = 2 43 44 require(maxSplitNum == 2) 45 46 private val SB = "b00".U(2.W) 47 private val SH = "b01".U(2.W) 48 private val SW = "b10".U(2.W) 49 private val SD = "b11".U(2.W) 50 51 // encode of how many bytes to shift or truncate 52 private val BYTE0 = "b000".U(3.W) 53 private val BYTE1 = "b001".U(3.W) 54 private val BYTE2 = "b010".U(3.W) 55 private val BYTE3 = "b011".U(3.W) 56 private val BYTE4 = "b100".U(3.W) 57 private val BYTE5 = "b101".U(3.W) 58 private val BYTE6 = "b110".U(3.W) 59 private val BYTE7 = "b111".U(3.W) 60 61 def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 62 SB -> 0x1.U, 63 SH -> 0x3.U, 64 SW -> 0xf.U, 65 SD -> 0xff.U 66 )) 67 68 def selectOldest[T <: LsPipelineBundle](valid: Seq[Bool], bits: Seq[T], index: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = { 69 assert(valid.length == bits.length) 70 if (valid.length == 0 || valid.length == 1) { 71 (valid, bits, index) 72 } else if (valid.length == 2) { 73 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 74 val resIndex = Seq.fill(2)(Wire(chiselTypeOf(index(0)))) 75 for (i <- res.indices) { 76 res(i).valid := valid(i) 77 res(i).bits := bits(i) 78 resIndex(i) := index(i) 79 } 80 val oldest = Mux(valid(0) && valid(1), 81 Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 82 (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 83 Mux(valid(0) && !valid(1), res(0), res(1))) 84 85 val oldestIndex = Mux(valid(0) && valid(1), 86 Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 87 (bits(0).uop.robIdx === bits(1).uop.robIdx && bits(0).uop.uopIdx > bits(1).uop.uopIdx), resIndex(1), resIndex(0)), 88 Mux(valid(0) && !valid(1), resIndex(0), resIndex(1))) 89 (Seq(oldest.valid), Seq(oldest.bits), Seq(oldestIndex)) 90 } else { 91 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), index.take(index.length / 2)) 92 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), index.takeRight(index.length - (index.length / 2))) 93 selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3) 94 } 95 } 96 97 val io = IO(new Bundle() { 98 val redirect = Flipped(Valid(new Redirect)) 99 val enq = Vec(enqPortNum, Flipped(new MisalignBufferEnqIO)) 100 val rob = Flipped(new RobLsqIO) 101 val splitStoreReq = Decoupled(new LsPipelineBundle) 102 val splitStoreResp = Flipped(Valid(new SqWriteBundle)) 103 val writeBack = Decoupled(new MemExuOutput) 104 val vecWriteBack = Vec(VecStorePipelineWidth, Decoupled(new VecPipelineFeedbackIO(isVStore = true))) 105 val storeOutValid = Input(Bool()) 106 val storeVecOutValid = Input(Bool()) 107 val overwriteExpBuf = Output(new XSBundle { 108 val valid = Bool() 109 val vaddr = UInt(XLEN.W) 110 val isHyper = Bool() 111 val gpaddr = UInt(XLEN.W) 112 val isForVSnonLeafPTE = Bool() 113 }) 114 val sqControl = new StoreMaBufToSqControlIO 115 116 val toVecStoreMergeBuffer = Vec(VecStorePipelineWidth, new StoreMaBufToVecStoreMergeBufferIO) 117 val full = Bool() 118 }) 119 120 io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 121 io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 122 123 class StoreMisalignBufferEntry(implicit p: Parameters) extends LsPipelineBundle { 124 val portIndex = UInt(log2Up(enqPortNum).W) 125 } 126 val req_valid = RegInit(false.B) 127 val req = Reg(new StoreMisalignBufferEntry) 128 129 val cross4KBPageBoundary = Wire(Bool()) 130 val needFlushPipe = RegInit(false.B) 131 132 // buffer control: 133 // - s_idle: Idle 134 // - s_split: Split miss-aligned store into aligned stores 135 // - s_req: Send split store to sta and get result from sta 136 // - s_resp: Responds to a split store access request 137 // - s_wb: writeback yo rob/vecMergeBuffer 138 // - s_block: Wait for this instr to reach the head of Rob. 139 val s_idle :: s_split :: s_req :: s_resp :: s_wb :: s_block :: Nil = Enum(6) 140 val bufferState = RegInit(s_idle) 141 142 // enqueue 143 // s1: 144 val s1_req = VecInit(io.enq.map(_.req.bits)) 145 val s1_valid = VecInit(io.enq.map(x => x.req.valid)) 146 147 val s1_index = (0 until io.enq.length).map(_.asUInt) 148 val reqSel = selectOldest(s1_valid, s1_req, s1_index) 149 150 val reqSelValid = reqSel._1(0) 151 val reqSelBits = reqSel._2(0) 152 val reqSelPort = reqSel._3(0) 153 154 val reqRedirect = reqSelBits.uop.robIdx.needFlush(io.redirect) 155 156 val canEnq = !req_valid && !reqRedirect && reqSelValid 157 val robMatch = req_valid && io.rob.pendingst && (io.rob.pendingPtr === req.uop.robIdx) 158 159 val s2_canEnq = GatedRegNext(canEnq) 160 val s2_reqSelPort = GatedRegNext(reqSelPort) 161 val misalign_can_split = Wire(Bool()) 162 misalign_can_split := Mux(s2_canEnq, (0 until enqPortNum).map { 163 case i => !io.enq(i).revoke && s2_reqSelPort === i.U 164 }.reduce(_|_), GatedRegNext(misalign_can_split)) 165 166 when(canEnq) { 167 connectSamePort(req, reqSelBits) 168 req.portIndex := reqSelPort 169 req_valid := true.B 170 } 171 val cross4KBPageEnq = WireInit(false.B) 172 when (cross4KBPageBoundary && !reqRedirect) { 173 when( 174 reqSelValid && 175 (isAfter(req.uop.robIdx, reqSelBits.uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSelBits.uop.robIdx) && req.uop.uopIdx > reqSelBits.uop.uopIdx)) && 176 bufferState === s_idle 177 ) { 178 connectSamePort(req, reqSelBits) 179 req.portIndex := reqSelPort 180 cross4KBPageEnq := true.B 181 needFlushPipe := true.B 182 } .otherwise { 183 req := req 184 cross4KBPageEnq := false.B 185 } 186 } 187 188 val reqSelCanEnq = UIntToOH(reqSelPort) 189 190 io.enq.zipWithIndex.map{ 191 case (reqPort, index) => reqPort.req.ready := reqSelCanEnq(index) && (!req_valid || cross4KBPageBoundary && cross4KBPageEnq) 192 } 193 194 io.toVecStoreMergeBuffer.zipWithIndex.map{ 195 case (toStMB, index) => { 196 toStMB.flush := req_valid && cross4KBPageBoundary && cross4KBPageEnq && UIntToOH(req.portIndex)(index) 197 toStMB.mbIndex := req.mbIndex 198 } 199 } 200 io.full := req_valid 201 202 //logic 203 val splitStoreReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 204 val splitStoreResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new SqWriteBundle)))) 205 val isCrossPage = RegInit(false.B) 206 val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec())) 207 val unSentStores = RegInit(0.U(maxSplitNum.W)) 208 val unWriteStores = RegInit(0.U(maxSplitNum.W)) 209 val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 210 211 // if there is exception or mmio in split store 212 val globalException = RegInit(false.B) 213 val globalUncache = RegInit(false.B) 214 215 // debug info 216 val globalMMIO = RegInit(false.B) 217 val globalNC = RegInit(false.B) 218 219 val hasException = io.splitStoreResp.bits.vecActive && !io.splitStoreResp.bits.need_rep && 220 ExceptionNO.selectByFu(io.splitStoreResp.bits.uop.exceptionVec, StaCfg).asUInt.orR || TriggerAction.isDmode(io.splitStoreResp.bits.uop.trigger) 221 val isUncache = (io.splitStoreResp.bits.mmio || io.splitStoreResp.bits.nc) && !io.splitStoreResp.bits.need_rep 222 223 io.sqControl.toStoreQueue.crossPageWithHit := io.sqControl.toStoreMisalignBuffer.sqPtr === req.uop.sqIdx && isCrossPage 224 io.sqControl.toStoreQueue.crossPageCanDeq := !isCrossPage || bufferState === s_block 225 io.sqControl.toStoreQueue.paddr := Cat(splitStoreResp(1).paddr(splitStoreResp(1).paddr.getWidth - 1, 3), 0.U(3.W)) 226 227 io.sqControl.toStoreQueue.withSameUop := io.sqControl.toStoreMisalignBuffer.uop.robIdx === req.uop.robIdx && io.sqControl.toStoreMisalignBuffer.uop.uopIdx === req.uop.uopIdx && req.isvec && robMatch && isCrossPage 228 229 //state transition 230 switch(bufferState) { 231 is (s_idle) { 232 when(cross4KBPageBoundary && misalign_can_split) { 233 when(robMatch) { 234 bufferState := s_split 235 isCrossPage := true.B 236 } 237 } .otherwise { 238 when (req_valid && misalign_can_split) { 239 bufferState := s_split 240 isCrossPage := false.B 241 } 242 } 243 } 244 245 is (s_split) { 246 bufferState := s_req 247 } 248 249 is (s_req) { 250 when (io.splitStoreReq.fire) { 251 bufferState := s_resp 252 } 253 } 254 255 is (s_resp) { 256 val needDelay = WireInit(false.B) 257 258 when (io.splitStoreResp.valid) { 259 val clearOh = UIntToOH(curPtr) 260 when (hasException || isUncache) { 261 // commit directly when exception ocurs 262 // if any split store reaches mmio space, delegate to software storeAddrMisaligned exception 263 bufferState := s_wb 264 globalException := hasException 265 globalUncache := isUncache 266 globalMMIO := io.splitStoreResp.bits.mmio 267 globalNC := io.splitStoreResp.bits.nc 268 } .elsewhen(io.splitStoreResp.bits.need_rep || (unSentStores & (~clearOh).asUInt).orR) { 269 // need replay or still has unsent requests 270 bufferState := s_req 271 } .otherwise { 272 // got result, goto calculate data and control sq. 273 // Wait a beat to get misalign writeback aligned raw rollback. 274 needDelay := true.B 275 bufferState := s_resp 276 } 277 } 278 279 when (RegNextN(needDelay, RAWTotalDelayCycles)) { 280 bufferState := s_wb 281 } 282 } 283 284 is (s_wb) { 285 when (req.isvec) { 286 when (io.vecWriteBack.map(x => x.fire).reduce( _ || _)) { 287 bufferState := s_idle 288 req_valid := false.B 289 curPtr := 0.U 290 unSentStores := 0.U 291 unWriteStores := 0.U 292 globalException := false.B 293 globalUncache := false.B 294 isCrossPage := false.B 295 needFlushPipe := false.B 296 297 globalMMIO := false.B 298 globalNC := false.B 299 } 300 301 }.otherwise { 302 when (io.writeBack.fire && (!isCrossPage || globalUncache || globalException)) { 303 bufferState := s_idle 304 req_valid := false.B 305 curPtr := 0.U 306 unSentStores := 0.U 307 unWriteStores := 0.U 308 globalException := false.B 309 globalUncache := false.B 310 isCrossPage := false.B 311 needFlushPipe := false.B 312 313 globalMMIO := false.B 314 globalNC := false.B 315 } .elsewhen(io.writeBack.fire && isCrossPage) { 316 bufferState := s_block 317 } .otherwise { 318 bufferState := s_wb 319 } 320 321 } 322 } 323 324 is (s_block) { 325 when (io.sqControl.toStoreMisalignBuffer.doDeq) { 326 bufferState := s_idle 327 req_valid := false.B 328 curPtr := 0.U 329 unSentStores := 0.U 330 unWriteStores := 0.U 331 globalException := false.B 332 globalUncache := false.B 333 isCrossPage := false.B 334 needFlushPipe := false.B 335 336 globalMMIO := false.B 337 globalNC := false.B 338 } 339 } 340 } 341 342 val alignedType = Mux(req.isvec, req.alignedType(1,0), req.uop.fuOpType(1, 0)) 343 344 val highAddress = LookupTree(alignedType, List( 345 SB -> 0.U, 346 SH -> 1.U, 347 SW -> 3.U, 348 SD -> 7.U 349 )) + req.vaddr(4, 0) 350 351 val highPageAddress = LookupTree(alignedType, List( 352 SB -> 0.U, 353 SH -> 1.U, 354 SW -> 3.U, 355 SD -> 7.U 356 )) + req.vaddr(12, 0) 357 // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 358 val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 359 cross4KBPageBoundary := req_valid && (highPageAddress(12) =/= req.vaddr(12)) 360 val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 361 val aligned16BytesSel = req.vaddr(3, 0) 362 363 // meta of 128 bit store 364 val new128Store = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 365 // meta of split loads 366 val lowAddrStore = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 367 val highAddrStore = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 368 // final lowResult = Cat(`lowResultWidth` of store data, 0.U(make it to fill total length of Vlen)) 369 val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data 370 // final highResult = Zero extend to Vlen(`highResultWidth` of (store data >> lowResultWidth)) 371 val highResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data 372 373 when (bufferState === s_split) { 374 when (!cross16BytesBoundary) { 375 assert(false.B, s"There should be no non-aligned access that does not cross 16Byte boundaries.") 376 } .otherwise { 377 // split this unaligned store into `maxSplitNum` aligned stores 378 unWriteStores := Fill(maxSplitNum, 1.U(1.W)) 379 unSentStores := Fill(maxSplitNum, 1.U(1.W)) 380 curPtr := 0.U 381 lowAddrStore.uop := req.uop 382 lowAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B 383 highAddrStore.uop := req.uop 384 highAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B 385 386 switch (alignedType(1, 0)) { 387 is (SB) { 388 assert(false.B, "lb should not trigger miss align") 389 } 390 391 is (SH) { 392 lowAddrStore.uop.fuOpType := SB 393 lowAddrStore.vaddr := req.vaddr 394 lowAddrStore.mask := 0x1.U << lowAddrStore.vaddr(3, 0) 395 lowResultWidth := BYTE1 396 397 highAddrStore.uop.fuOpType := SB 398 highAddrStore.vaddr := req.vaddr + 1.U 399 highAddrStore.mask := 0x1.U << highAddrStore.vaddr(3, 0) 400 highResultWidth := BYTE1 401 } 402 403 is (SW) { 404 switch (req.vaddr(1, 0)) { 405 is ("b00".U) { 406 assert(false.B, "should not trigger miss align") 407 } 408 409 is ("b01".U) { 410 lowAddrStore.uop.fuOpType := SW 411 lowAddrStore.vaddr := req.vaddr - 1.U 412 lowAddrStore.mask := 0xf.U << lowAddrStore.vaddr(3, 0) 413 lowResultWidth := BYTE3 414 415 highAddrStore.uop.fuOpType := SB 416 highAddrStore.vaddr := req.vaddr + 3.U 417 highAddrStore.mask := 0x1.U << highAddrStore.vaddr(3, 0) 418 highResultWidth := BYTE1 419 } 420 421 is ("b10".U) { 422 lowAddrStore.uop.fuOpType := SH 423 lowAddrStore.vaddr := req.vaddr 424 lowAddrStore.mask := 0x3.U << lowAddrStore.vaddr(3, 0) 425 lowResultWidth := BYTE2 426 427 highAddrStore.uop.fuOpType := SH 428 highAddrStore.vaddr := req.vaddr + 2.U 429 highAddrStore.mask := 0x3.U << highAddrStore.vaddr(3, 0) 430 highResultWidth := BYTE2 431 } 432 433 is ("b11".U) { 434 lowAddrStore.uop.fuOpType := SB 435 lowAddrStore.vaddr := req.vaddr 436 lowAddrStore.mask := 0x1.U << lowAddrStore.vaddr(3, 0) 437 lowResultWidth := BYTE1 438 439 highAddrStore.uop.fuOpType := SW 440 highAddrStore.vaddr := req.vaddr + 1.U 441 highAddrStore.mask := 0xf.U << highAddrStore.vaddr(3, 0) 442 highResultWidth := BYTE3 443 } 444 } 445 } 446 447 is (SD) { 448 switch (req.vaddr(2, 0)) { 449 is ("b000".U) { 450 assert(false.B, "should not trigger miss align") 451 } 452 453 is ("b001".U) { 454 lowAddrStore.uop.fuOpType := SD 455 lowAddrStore.vaddr := req.vaddr - 1.U 456 lowAddrStore.mask := 0xff.U << lowAddrStore.vaddr(3, 0) 457 lowResultWidth := BYTE7 458 459 highAddrStore.uop.fuOpType := SB 460 highAddrStore.vaddr := req.vaddr + 7.U 461 highAddrStore.mask := 0x1.U << highAddrStore.vaddr(3, 0) 462 highResultWidth := BYTE1 463 } 464 465 is ("b010".U) { 466 lowAddrStore.uop.fuOpType := SD 467 lowAddrStore.vaddr := req.vaddr - 2.U 468 lowAddrStore.mask := 0xff.U << lowAddrStore.vaddr(3, 0) 469 lowResultWidth := BYTE6 470 471 highAddrStore.uop.fuOpType := SH 472 highAddrStore.vaddr := req.vaddr + 6.U 473 highAddrStore.mask := 0x3.U << highAddrStore.vaddr(3, 0) 474 highResultWidth := BYTE2 475 } 476 477 is ("b011".U) { 478 lowAddrStore.uop.fuOpType := SD 479 lowAddrStore.vaddr := req.vaddr - 3.U 480 lowAddrStore.mask := 0xff.U << lowAddrStore.vaddr(3, 0) 481 lowResultWidth := BYTE5 482 483 highAddrStore.uop.fuOpType := SW 484 highAddrStore.vaddr := req.vaddr + 5.U 485 highAddrStore.mask := 0xf.U << highAddrStore.vaddr(3, 0) 486 highResultWidth := BYTE3 487 } 488 489 is ("b100".U) { 490 lowAddrStore.uop.fuOpType := SW 491 lowAddrStore.vaddr := req.vaddr 492 lowAddrStore.mask := 0xf.U << lowAddrStore.vaddr(3, 0) 493 lowResultWidth := BYTE4 494 495 highAddrStore.uop.fuOpType := SW 496 highAddrStore.vaddr := req.vaddr + 4.U 497 highAddrStore.mask := 0xf.U << highAddrStore.vaddr(3, 0) 498 highResultWidth := BYTE4 499 } 500 501 is ("b101".U) { 502 lowAddrStore.uop.fuOpType := SD 503 lowAddrStore.vaddr := req.vaddr - 5.U 504 lowAddrStore.mask := 0xff.U << lowAddrStore.vaddr(3, 0) 505 lowResultWidth := BYTE3 506 507 highAddrStore.uop.fuOpType := SD 508 highAddrStore.vaddr := req.vaddr + 3.U 509 highAddrStore.mask := 0xff.U << highAddrStore.vaddr(3, 0) 510 highResultWidth := BYTE5 511 } 512 513 is ("b110".U) { 514 lowAddrStore.uop.fuOpType := SD 515 lowAddrStore.vaddr := req.vaddr - 6.U 516 lowAddrStore.mask := 0xff.U << lowAddrStore.vaddr(3, 0) 517 lowResultWidth := BYTE2 518 519 highAddrStore.uop.fuOpType := SD 520 highAddrStore.vaddr := req.vaddr + 2.U 521 highAddrStore.mask := 0xff.U << highAddrStore.vaddr(3, 0) 522 highResultWidth := BYTE6 523 } 524 525 is ("b111".U) { 526 lowAddrStore.uop.fuOpType := SD 527 lowAddrStore.vaddr := req.vaddr - 7.U 528 lowAddrStore.mask := 0xff.U << lowAddrStore.vaddr(3, 0) 529 lowResultWidth := BYTE1 530 531 highAddrStore.uop.fuOpType := SD 532 highAddrStore.vaddr := req.vaddr + 1.U 533 highAddrStore.mask := 0xff.U << highAddrStore.vaddr(3, 0) 534 highResultWidth := BYTE7 535 } 536 } 537 } 538 } 539 540 splitStoreReqs(0) := lowAddrStore 541 splitStoreReqs(1) := highAddrStore 542 } 543 } 544 545 io.splitStoreReq.valid := req_valid && (bufferState === s_req) 546 io.splitStoreReq.bits := splitStoreReqs(curPtr) 547 io.splitStoreReq.bits.isvec := req.isvec 548 // Restore the information of H extension store 549 // bit encoding: | hsv 1 | store 00 | size(2bit) | 550 val reqIsHsv = LSUOpType.isHsv(req.uop.fuOpType) 551 io.splitStoreReq.bits.uop.fuOpType := Mux(req.isvec, req.uop.fuOpType, Cat(reqIsHsv, 0.U(2.W), splitStoreReqs(curPtr).uop.fuOpType(1, 0))) 552 io.splitStoreReq.bits.alignedType := Mux(req.isvec, splitStoreReqs(curPtr).uop.fuOpType(1, 0), req.alignedType) 553 io.splitStoreReq.bits.isFinalSplit := curPtr(0) 554 555 when (io.splitStoreResp.valid) { 556 val resp = io.splitStoreResp.bits 557 splitStoreResp(curPtr) := io.splitStoreResp.bits 558 when (isUncache) { 559 unWriteStores := 0.U 560 unSentStores := 0.U 561 exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(exceptionVec.cloneType), StaCfg) 562 // delegate to software 563 exceptionVec(storeAddrMisaligned) := true.B 564 } .elsewhen (hasException) { 565 unWriteStores := 0.U 566 unSentStores := 0.U 567 StaCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no)) 568 } .elsewhen (!io.splitStoreResp.bits.need_rep) { 569 unSentStores := unSentStores & (~UIntToOH(curPtr)).asUInt 570 curPtr := curPtr + 1.U 571 exceptionVec := 0.U.asTypeOf(ExceptionVec()) 572 } 573 } 574 575 val splitStoreData = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new XSBundle { 576 val wdata = UInt(VLEN.W) 577 val wmask = UInt((VLEN / 8).W) 578 })))) 579 580 val wmaskLow = Wire(Vec(VLEN / 8, Bool())) 581 val wmaskHigh = Wire(Vec(VLEN / 8, Bool())) 582 (0 until (VLEN / 8)).map { 583 case i => { 584 when (i.U < highResultWidth) { 585 wmaskHigh(i) := true.B 586 } .otherwise { 587 wmaskHigh(i) := false.B 588 } 589 when (i.U < lowResultWidth) { 590 wmaskLow(i) := true.B 591 } .otherwise { 592 wmaskLow(i) := false.B 593 } 594 } 595 } 596 597 io.writeBack.valid := req_valid && (bufferState === s_wb) && !io.storeOutValid && !req.isvec 598 io.writeBack.bits.uop := req.uop 599 io.writeBack.bits.uop.exceptionVec := DontCare 600 StaCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalUncache || globalException) && exceptionVec(no)) 601 io.writeBack.bits.uop.flushPipe := needFlushPipe 602 io.writeBack.bits.uop.replayInst := false.B 603 io.writeBack.bits.data := DontCare 604 io.writeBack.bits.isFromLoadUnit := DontCare 605 io.writeBack.bits.debug.isMMIO := globalMMIO 606 io.writeBack.bits.debug.isNC := globalNC 607 io.writeBack.bits.debug.isPerfCnt := false.B 608 io.writeBack.bits.debug.paddr := req.paddr 609 io.writeBack.bits.debug.vaddr := req.vaddr 610 611 io.vecWriteBack.zipWithIndex.map{ 612 case (wb, index) => { 613 wb.valid := req_valid && (bufferState === s_wb) && req.isvec && !io.storeVecOutValid && UIntToOH(req.portIndex)(index) 614 615 wb.bits.mBIndex := req.mbIndex 616 wb.bits.hit := true.B 617 wb.bits.isvec := true.B 618 wb.bits.sourceType := RSFeedbackType.tlbMiss 619 wb.bits.flushState := DontCare 620 wb.bits.trigger := TriggerAction.None 621 wb.bits.mmio := globalMMIO 622 wb.bits.exceptionVec := ExceptionNO.selectByFu(exceptionVec, VstuCfg) 623 wb.bits.hasException := globalException 624 wb.bits.usSecondInv := req.usSecondInv 625 wb.bits.vecFeedback := true.B 626 wb.bits.elemIdx := req.elemIdx 627 wb.bits.alignedType := req.alignedType 628 wb.bits.mask := req.mask 629 wb.bits.vaddr := req.vaddr 630 wb.bits.vaNeedExt := req.vaNeedExt 631 wb.bits.gpaddr := req.gpaddr 632 wb.bits.isForVSnonLeafPTE := req.isForVSnonLeafPTE 633 wb.bits.vstart := req.uop.vpu.vstart 634 wb.bits.vecTriggerMask := 0.U 635 wb.bits.nc := globalNC 636 } 637 } 638 639 val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 640 641 when (flush) { 642 bufferState := s_idle 643 req_valid := Mux(cross4KBPageEnq && cross4KBPageBoundary && !reqRedirect, req_valid, false.B) 644 curPtr := 0.U 645 unSentStores := 0.U 646 unWriteStores := 0.U 647 globalException := false.B 648 globalUncache := false.B 649 isCrossPage := false.B 650 needFlushPipe := false.B 651 652 globalMMIO := false.B 653 globalNC := false.B 654 } 655 656 // NOTE: spectial case (unaligned store cross page, page fault happens in next page) 657 // if exception happens in the higher page address part, overwrite the storeExceptionBuffer vaddr 658 val shouldOverwrite = req_valid && cross16BytesBoundary && globalException && (curPtr === 1.U) 659 val overwriteExpBuf = GatedValidRegNext(shouldOverwrite) 660 val overwriteVaddr = RegEnable(splitStoreResp(curPtr).vaddr, shouldOverwrite) 661 val overwriteIsHyper = RegEnable(splitStoreResp(curPtr).isHyper, shouldOverwrite) 662 val overwriteGpaddr = RegEnable(splitStoreResp(curPtr).gpaddr, shouldOverwrite) 663 val overwriteIsForVSnonLeafPTE = RegEnable(splitStoreResp(curPtr).isForVSnonLeafPTE, shouldOverwrite) 664 665 //TODO In theory, there is no need to overwrite, but for now, the signal is retained in the code in this way. 666 // and the signal will be removed after sufficient verification. 667 io.overwriteExpBuf.valid := false.B 668 io.overwriteExpBuf.vaddr := overwriteVaddr 669 io.overwriteExpBuf.isHyper := overwriteIsHyper 670 io.overwriteExpBuf.gpaddr := overwriteGpaddr 671 io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 672 673 XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 674 XSPerfAccumulate("flush", flush) 675 XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 676 XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 677} 678