1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.mem._ 28import xiangshan.backend.fu.FuType 29import freechips.rocketchip.diplomacy.BufferParams 30import xiangshan.cache.mmu._ 31import xiangshan.cache._ 32import xiangshan.cache.wpu.ReplayCarry 33import xiangshan.backend.fu.util.SdtrigExt 34import xiangshan.ExceptionNO._ 35import xiangshan.backend.fu.vector.Bundles.VConfig 36import xiangshan.backend.datapath.NewPipelineConnect 37import xiangshan.backend.fu.NewCSR._ 38import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec 39 40class VSegmentBundle(implicit p: Parameters) extends VLSUBundle 41{ 42 val baseVaddr = UInt(XLEN.W) 43 val uop = new DynInst 44 val paddr = UInt(PAddrBits.W) 45 val mask = UInt(VLEN.W) 46 val alignedType = UInt(alignTypeBits.W) 47 val vl = UInt(elemIdxBits.W) 48 val uopFlowNum = UInt(elemIdxBits.W) 49 val uopFlowNumMask = UInt(elemIdxBits.W) 50 // for exception 51 val vstart = UInt(elemIdxBits.W) 52 val exceptionVaddr = UInt(XLEN.W) 53 val exceptionGpaddr = UInt(XLEN.W) 54 val exceptionIsForVSnonLeafPTE = Bool() 55 val exception_va = Bool() 56 val exception_gpa = Bool() 57 val exception_pa = Bool() 58 val exceptionVstart = UInt(elemIdxBits.W) 59 val exceptionVl = UInt(elemIdxBits.W) 60 val isFof = Bool() 61} 62 63// latch each uop's VecWen, pdest, v0Wen, uopIdx 64class VSegmentUop(implicit p: Parameters) extends VLSUBundle{ 65 val uop = new DynInst 66} 67 68class VSegmentUnit (implicit p: Parameters) extends VLSUModule 69 with HasDCacheParameters 70 with MemoryOpConstants 71 with SdtrigExt 72 with HasLoadHelper 73{ 74 val io = IO(new VSegmentUnitIO) 75 76 val maxSize = VSegmentBufferSize 77 78 class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){ 79 } 80 81 object VSegUPtr { 82 def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = { 83 val ptr = Wire(new VSegUPtr) 84 ptr.flag := f 85 ptr.value := v 86 ptr 87 } 88 } 89 90 91 /** 92 ******************************************************************************************************** 93 * Use an example to illustrate the working logic of a segmentunit: * 94 * For: * 95 * lmul=2 sew=32 emul=2 eew=32 vl=16 * 96 * Then: * 97 * Access memory in the order: * 98 * (V2,S0),(V4,S0),(V6,S0),(V8,S0), * 99 * (V2,S1),(V4,S1),(V6,S1),(V8,S1), * 100 * (V2,S2),(V4,S2),(V6,S2),(V8,S2), * 101 * (V2,S3),(V4,S3),(V6,S3),(V8,S3), * 102 * (V3,S4),(V5,S4),(V7,S4),(V9,S4), * 103 * (V3,S5),(V5,S5),(V7,S5),(V9,S5), * 104 * (V3,S6),(V5,S6),(V7,S6),(V9,S6), * 105 * (V3,S7),(V5,S7),(V7,S7),(V9,S7), * 106 * * 107 * * 108 * [[data]] saves the data generated by the access and corresponds to the register. * 109 * [[splitPtr]] controls the destination register written to. * 110 * * 111 * splitptr offset can be seen in [[splitPtrNext]] is assignment logic, * 112 * which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]] * 113 * First access different fields of the same segment, and then visit different segments. * 114 * For the case of 'emul' greater than 1, such as the following example, * 115 * although 'v2' and 'v3' are different vd and the same field, they are still different segments, * 116 * so they should be accessed sequentially.Just like the 'Access memory in the order' above. * 117 * * 118 * [[segmentIdx]] * 119 * | * 120 * | * 121 * V * 122 * * 123 * S0 S1 S2 S3 * 124 * ---------------------------------------------------------------------------- * 125 * [[splitPtr]]--> v2 | field0 | field0 | field0 | field0 | * 126 * ---------------------------------------------------------------------------- * 127 * S4 S5 S6 S7 * 128 * ---------------------------------------------------------------------------- * 129 * v3 | field0 | field0 | field0 | field0 | * 130 * ---------------------------------------------------------------------------- * 131 * S0 S1 S2 S3 * 132 * ---------------------------------------------------------------------------- * 133 * v4 | field1 | field1 | field1 | field1 | * 134 * ---------------------------------------------------------------------------- * 135 * S4 S5 S6 S7 * 136 * ---------------------------------------------------------------------------- * 137 * v5 | field1 | field1 | field1 | field1 | * 138 * ---------------------------------------------------------------------------- * 139 * S0 S1 S2 S3 * 140 * ---------------------------------------------------------------------------- * 141 * v6 | field2 | field2 | field2 | field2 | * 142 * ---------------------------------------------------------------------------- * 143 * S4 S5 S6 S7 * 144 * ---------------------------------------------------------------------------- * 145 * v7 | field2 | field2 | field2 | field2 | * 146 * ---------------------------------------------------------------------------- * 147 * S0 S1 S2 S3 * 148 * ---------------------------------------------------------------------------- * 149 * v8 | field3 | field3 | field3 | field3 | * 150 * ---------------------------------------------------------------------------- * 151 * S4 S5 S6 S7 * 152 * ---------------------------------------------------------------------------- * 153 * v9 | field3 | field3 | field3 | field3 | * 154 * ---------------------------------------------------------------------------- * * 155 * * * 156 * * * 157 ******************************************************************************************************** 158 **/ 159 160 161 // buffer uop 162 val instMicroOp = Reg(new VSegmentBundle) 163 val instMicroOpValid = RegInit(false.B) 164 val data = Reg(Vec(maxSize, UInt(VLEN.W))) 165 val uopq = Reg(Vec(maxSize, new VSegmentUop)) 166 val stride = Reg(Vec(maxSize, UInt(VLEN.W))) 167 val allocated = RegInit(VecInit(Seq.fill(maxSize)(false.B))) 168 val enqPtr = RegInit(0.U.asTypeOf(new VSegUPtr)) 169 val deqPtr = RegInit(0.U.asTypeOf(new VSegUPtr)) 170 val stridePtr = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index 171 172 val segmentIdx = RegInit(0.U(elemIdxBits.W)) 173 val fieldIdx = RegInit(0.U(fieldBits.W)) 174 val segmentOffset = RegInit(0.U(XLEN.W)) 175 val splitPtr = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data 176 val splitPtrNext = WireInit(0.U.asTypeOf(new VSegUPtr)) 177 178 val exception_va = WireInit(false.B) 179 val exception_gpa = WireInit(false.B) 180 val exception_pa = WireInit(false.B) 181 182 val maxSegIdx = instMicroOp.vl - 1.U 183 val maxNfields = instMicroOp.uop.vpu.nf 184 val latchVaddr = RegInit(0.U(VAddrBits.W)) 185 186 XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n") 187 XSError((fieldIdx > maxNfields) && instMicroOpValid, s"fieldIdx > nfields, something error!\n") 188 189 // MicroOp 190 val baseVaddr = instMicroOp.baseVaddr 191 val alignedType = instMicroOp.alignedType 192 val fuType = instMicroOp.uop.fuType 193 val mask = instMicroOp.mask 194 val exceptionVec = instMicroOp.uop.exceptionVec 195 val issueEew = instMicroOp.uop.vpu.veew 196 val issueLmul = instMicroOp.uop.vpu.vtype.vlmul 197 val issueSew = instMicroOp.uop.vpu.vtype.vsew 198 val issueEmul = EewLog2(issueEew) - issueSew + issueLmul 199 val elemIdxInVd = segmentIdx & instMicroOp.uopFlowNumMask 200 val issueInstType = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction 201 val issueUopFlowNumLog2 = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd 202 val issueVlMax = instMicroOp.uopFlowNum // max elementIdx in vd 203 val issueMaxIdxInIndex = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register 204 val issueMaxIdxInIndexMask = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits) 205 val issueMaxIdxInIndexLog2 = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) 206 val issueIndexIdx = segmentIdx & issueMaxIdxInIndexMask 207 val segmentActive = (mask & UIntToOH(segmentIdx)).orR 208 209 // sbuffer write interface 210 val sbufferOut = Wire(Decoupled(new DCacheWordReqWithVaddrAndPfFlag)) 211 212 // Segment instruction's FSM 213 /* 214 * s_idle: wait request 215 * s_flush_sbuffer_req: flush sbuffer 216 * s_wait_flush_sbuffer_resp: wait sbuffer empty 217 * s_tlb_req: request tlb 218 * s_wait_tlb_resp: wait tlb resp 219 * s_pm: check pmp 220 * s_cache_req: request cache 221 * s_cache_resp: wait cache resp 222 * s_latch_and_merge_data: for read data 223 * s_send_data: for send write data 224 * s_wait_to_sbuffer: Wait for data from the sbufferOut pipelayer to be sent to the sbuffer 225 * s_finish: 226 * */ 227 val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_latch_and_merge_data :: s_send_data :: s_wait_to_sbuffer :: s_finish :: Nil = Enum(12) 228 val state = RegInit(s_idle) 229 val stateNext = WireInit(s_idle) 230 val sbufferEmpty = io.flush_sbuffer.empty 231 val isVSegLoad = FuType.isVSegLoad(instMicroOp.uop.fuType) 232 233 /** 234 * state update 235 */ 236 state := stateNext 237 238 /** 239 * state transfer 240 */ 241 when(state === s_idle){ 242 stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle) 243 }.elsewhen(state === s_flush_sbuffer_req){ 244 stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb 245 246 }.elsewhen(state === s_wait_flush_sbuffer_resp){ 247 stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) 248 249 }.elsewhen(state === s_tlb_req){ 250 stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(isVSegLoad, s_latch_and_merge_data, s_send_data)) 251 252 }.elsewhen(state === s_wait_tlb_resp){ 253 stateNext := Mux(io.dtlb.resp.fire, 254 Mux(!io.dtlb.resp.bits.miss, 255 s_pm, 256 s_tlb_req), 257 s_wait_tlb_resp) 258 259 }.elsewhen(state === s_pm){ 260 /* if is vStore, send data to sbuffer, so don't need query dcache */ 261 stateNext := Mux(exception_pa || exception_va || exception_gpa, 262 s_finish, 263 Mux(isVSegLoad, s_cache_req, s_send_data)) 264 265 }.elsewhen(state === s_cache_req){ 266 stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req) 267 268 }.elsewhen(state === s_cache_resp){ 269 when(io.rdcache.resp.fire) { 270 when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) { 271 stateNext := s_cache_req 272 }.otherwise { 273 stateNext := Mux(isVSegLoad, s_latch_and_merge_data, s_send_data) 274 } 275 }.otherwise{ 276 stateNext := s_cache_resp 277 } 278 /* if segment is inactive, don't need to wait access all of the field */ 279 }.elsewhen(state === s_latch_and_merge_data) { 280 when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) || 281 ((segmentIdx === maxSegIdx) && !segmentActive)) { 282 283 stateNext := s_finish // segment instruction finish 284 }.otherwise { 285 stateNext := s_tlb_req // need continue 286 } 287 /* if segment is inactive, don't need to wait access all of the field */ 288 }.elsewhen(state === s_send_data) { // when sbuffer accept data 289 when(!sbufferOut.fire && segmentActive) { 290 stateNext := s_send_data 291 }.elsewhen(segmentIdx === maxSegIdx && (fieldIdx === maxNfields && sbufferOut.fire || !segmentActive && io.sbuffer.valid && !io.sbuffer.ready)) { 292 stateNext := s_wait_to_sbuffer 293 }.elsewhen(segmentIdx === maxSegIdx && !segmentActive){ 294 stateNext := s_finish // segment instruction finish 295 }.otherwise { 296 stateNext := s_tlb_req // need continue 297 } 298 299 }.elsewhen(state === s_wait_to_sbuffer){ 300 stateNext := Mux(io.sbuffer.fire, s_finish, s_wait_to_sbuffer) 301 302 }.elsewhen(state === s_finish){ // writeback uop 303 stateNext := Mux(distanceBetween(enqPtr, deqPtr) === 0.U, s_idle, s_finish) 304 305 }.otherwise{ 306 stateNext := s_idle 307 XSError(true.B, s"Unknown state!\n") 308 } 309 310 /************************************************************************* 311 * enqueue logic 312 *************************************************************************/ 313 io.in.ready := true.B 314 val fuOpType = io.in.bits.uop.fuOpType 315 val vtype = io.in.bits.uop.vpu.vtype 316 val mop = fuOpType(6, 5) 317 val instType = Cat(true.B, mop) 318 val eew = io.in.bits.uop.vpu.veew 319 val sew = vtype.vsew 320 val lmul = vtype.vlmul 321 val emul = EewLog2(eew) - sew + lmul 322 val vl = instMicroOp.vl 323 val vm = instMicroOp.uop.vpu.vm 324 val vstart = instMicroOp.uop.vpu.vstart 325 val srcMask = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true) 326 // first uop enqueue, we need to latch microOp of segment instruction 327 when(io.in.fire && !instMicroOpValid){ 328 // element number in a vd 329 // TODO Rewrite it in a more elegant way. 330 val uopFlowNum = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits) 331 instMicroOp.baseVaddr := io.in.bits.src_rs1 332 instMicroOpValid := true.B // if is first uop 333 instMicroOp.alignedType := Mux(isIndexed(instType), sew(1, 0), eew) 334 instMicroOp.uop := io.in.bits.uop 335 instMicroOp.mask := srcMask 336 instMicroOp.vstart := 0.U 337 instMicroOp.uopFlowNum := uopFlowNum 338 instMicroOp.uopFlowNumMask := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data 339 instMicroOp.vl := io.in.bits.src_vl.asTypeOf(VConfig()).vl 340 segmentOffset := 0.U 341 instMicroOp.isFof := (fuOpType === VlduType.vleff) && FuType.isVLoad(fuType) 342 } 343 // latch data 344 when(io.in.fire){ 345 data(enqPtr.value) := io.in.bits.src_vs3 346 stride(enqPtr.value) := io.in.bits.src_stride 347 uopq(enqPtr.value).uop := io.in.bits.uop 348 } 349 350 // update enqptr, only 1 port 351 when(io.in.fire){ 352 enqPtr := enqPtr + 1.U 353 } 354 355 /************************************************************************* 356 * output logic 357 *************************************************************************/ 358 359 val indexStride = IndexAddr( // index for indexed instruction 360 index = stride(stridePtr.value), 361 flow_inner_idx = issueIndexIdx, 362 eew = issueEew 363 ) 364 val realSegmentOffset = Mux(isIndexed(issueInstType), 365 indexStride, 366 segmentOffset) 367 val vaddr = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset 368 369 //latch vaddr 370 when(state === s_tlb_req){ 371 latchVaddr := vaddr(VAddrBits - 1, 0) 372 } 373 /** 374 * tlb req and tlb resq 375 */ 376 377 // query DTLB IO Assign 378 io.dtlb.req := DontCare 379 io.dtlb.resp.ready := true.B 380 io.dtlb.req.valid := state === s_tlb_req && segmentActive 381 io.dtlb.req.bits.cmd := Mux(FuType.isVLoad(fuType), TlbCmd.read, TlbCmd.write) 382 io.dtlb.req.bits.vaddr := vaddr(VAddrBits - 1, 0) 383 io.dtlb.req.bits.fullva := vaddr 384 io.dtlb.req.bits.checkfullva := true.B 385 io.dtlb.req.bits.size := instMicroOp.alignedType(2,0) 386 io.dtlb.req.bits.memidx.is_ld := FuType.isVLoad(fuType) 387 io.dtlb.req.bits.memidx.is_st := FuType.isVStore(fuType) 388 io.dtlb.req.bits.debug.robIdx := instMicroOp.uop.robIdx 389 io.dtlb.req.bits.no_translate := false.B 390 io.dtlb.req.bits.debug.pc := instMicroOp.uop.pc 391 io.dtlb.req.bits.debug.isFirstIssue := DontCare 392 io.dtlb.req_kill := false.B 393 394 val canTriggerException = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger 395 396 val segmentTrigger = Module(new VSegmentTrigger) 397 segmentTrigger.io.fromCsrTrigger.tdataVec := io.fromCsrTrigger.tdataVec 398 segmentTrigger.io.fromCsrTrigger.tEnableVec := io.fromCsrTrigger.tEnableVec 399 segmentTrigger.io.fromCsrTrigger.triggerCanRaiseBpExp := io.fromCsrTrigger.triggerCanRaiseBpExp 400 segmentTrigger.io.fromCsrTrigger.debugMode := io.fromCsrTrigger.debugMode 401 segmentTrigger.io.memType := isVSegLoad 402 segmentTrigger.io.fromLoadStore.vaddr := latchVaddr 403 segmentTrigger.io.fromLoadStore.isVectorUnitStride := false.B 404 segmentTrigger.io.fromLoadStore.mask := 0.U 405 406 val triggerAction = segmentTrigger.io.toLoadStore.triggerAction 407 val triggerDebugMode = TriggerAction.isDmode(triggerAction) 408 val triggerBreakpoint = TriggerAction.isExp(triggerAction) 409 410 // tlb resp 411 when(io.dtlb.resp.fire && state === s_wait_tlb_resp){ 412 exceptionVec(storePageFault) := io.dtlb.resp.bits.excp(0).pf.st && canTriggerException 413 exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp(0).pf.ld && canTriggerException 414 exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st && canTriggerException 415 exceptionVec(loadGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.ld && canTriggerException 416 exceptionVec(storeAccessFault) := io.dtlb.resp.bits.excp(0).af.st && canTriggerException 417 exceptionVec(loadAccessFault) := io.dtlb.resp.bits.excp(0).af.ld && canTriggerException 418 when(!io.dtlb.resp.bits.miss){ 419 instMicroOp.paddr := io.dtlb.resp.bits.paddr(0) 420 instMicroOp.exceptionGpaddr := io.dtlb.resp.bits.gpaddr(0) 421 instMicroOp.exceptionIsForVSnonLeafPTE := io.dtlb.resp.bits.isForVSnonLeafPTE 422 } 423 } 424 // pmp 425 // NOTE: only handle load/store exception here, if other exception happens, don't send here 426 val pmp = WireInit(io.pmpResp) 427 when(state === s_pm) { 428 val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List( 429 "b00".U -> true.B, //b 430 "b01".U -> (vaddr(0) === 0.U), //h 431 "b10".U -> (vaddr(1, 0) === 0.U), //w 432 "b11".U -> (vaddr(2, 0) === 0.U) //d 433 )) 434 val missAligned = !addr_aligned 435 exceptionVec(loadAddrMisaligned) := missAligned && FuType.isVLoad(fuType) && canTriggerException 436 exceptionVec(storeAddrMisaligned) := missAligned && FuType.isVStore(fuType) && canTriggerException 437 438 exception_va := exceptionVec(storePageFault) || exceptionVec(loadPageFault) || 439 exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) || 440 exceptionVec(breakPoint) || triggerDebugMode || (missAligned && canTriggerException) 441 exception_gpa := exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault) 442 exception_pa := (pmp.st || pmp.ld || pmp.mmio) && canTriggerException 443 444 instMicroOp.exception_pa := exception_pa 445 instMicroOp.exception_va := exception_va 446 instMicroOp.exception_gpa := exception_gpa 447 // update storeAccessFault bit. Currently, we don't support vector MMIO 448 exceptionVec(loadAccessFault) := (exceptionVec(loadAccessFault) || pmp.ld || pmp.mmio) && canTriggerException 449 exceptionVec(storeAccessFault) := (exceptionVec(storeAccessFault) || pmp.st || pmp.mmio) && canTriggerException 450 exceptionVec(breakPoint) := triggerBreakpoint && canTriggerException 451 452 when(exception_va || exception_gpa || exception_pa) { 453 when(canTriggerException) { 454 instMicroOp.exceptionVaddr := vaddr 455 instMicroOp.exceptionVl := segmentIdx // for exception 456 instMicroOp.exceptionVstart := segmentIdx // for exception 457 }.otherwise { 458 instMicroOp.exceptionVl := segmentIdx 459 } 460 } 461 462 when(exceptionVec(breakPoint) || triggerDebugMode) { 463 instMicroOp.uop.trigger := triggerAction 464 } 465 } 466 467 /** 468 * flush sbuffer IO Assign 469 */ 470 io.flush_sbuffer.valid := !sbufferEmpty && (state === s_flush_sbuffer_req) 471 472 473 /** 474 * merge data for load 475 */ 476 val cacheData = LookupTree(latchVaddr(3,0), List( 477 "b0000".U -> io.rdcache.resp.bits.data_delayed(63, 0), 478 "b0001".U -> io.rdcache.resp.bits.data_delayed(63, 8), 479 "b0010".U -> io.rdcache.resp.bits.data_delayed(63, 16), 480 "b0011".U -> io.rdcache.resp.bits.data_delayed(63, 24), 481 "b0100".U -> io.rdcache.resp.bits.data_delayed(63, 32), 482 "b0101".U -> io.rdcache.resp.bits.data_delayed(63, 40), 483 "b0110".U -> io.rdcache.resp.bits.data_delayed(63, 48), 484 "b0111".U -> io.rdcache.resp.bits.data_delayed(63, 56), 485 "b1000".U -> io.rdcache.resp.bits.data_delayed(127, 64), 486 "b1001".U -> io.rdcache.resp.bits.data_delayed(127, 72), 487 "b1010".U -> io.rdcache.resp.bits.data_delayed(127, 80), 488 "b1011".U -> io.rdcache.resp.bits.data_delayed(127, 88), 489 "b1100".U -> io.rdcache.resp.bits.data_delayed(127, 96), 490 "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104), 491 "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112), 492 "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120) 493 )) 494 val pickData = rdataVecHelper(alignedType(1,0), cacheData) 495 val mergedData = mergeDataWithElemIdx( 496 oldData = data(splitPtr.value), 497 newData = Seq(pickData), 498 alignedType = alignedType(1,0), 499 elemIdx = Seq(elemIdxInVd), 500 valids = Seq(true.B) 501 ) 502 when(state === s_latch_and_merge_data && segmentActive){ 503 data(splitPtr.value) := mergedData 504 } 505 /** 506 * split data for store 507 * */ 508 val splitData = genVSData( 509 data = data(splitPtr.value), 510 elemIdx = elemIdxInVd, 511 alignedType = alignedType 512 ) 513 val flowData = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data 514 val wmask = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive) 515 516 /** 517 * rdcache req, write request don't need to query dcache, because we write element to sbuffer 518 */ 519 io.rdcache.req := DontCare 520 io.rdcache.req.valid := state === s_cache_req && FuType.isVLoad(fuType) 521 io.rdcache.req.bits.cmd := MemoryOpConstants.M_XRD 522 io.rdcache.req.bits.vaddr := latchVaddr 523 io.rdcache.req.bits.mask := mask 524 io.rdcache.req.bits.data := flowData 525 io.rdcache.pf_source := LOAD_SOURCE.U 526 io.rdcache.req.bits.id := DontCare 527 io.rdcache.resp.ready := true.B 528 io.rdcache.s1_paddr_dup_lsu := instMicroOp.paddr 529 io.rdcache.s1_paddr_dup_dcache := instMicroOp.paddr 530 io.rdcache.s1_kill := false.B 531 io.rdcache.s1_kill_data_read := false.B 532 io.rdcache.s2_kill := false.B 533 if (env.FPGAPlatform){ 534 io.rdcache.s0_pc := DontCare 535 io.rdcache.s1_pc := DontCare 536 io.rdcache.s2_pc := DontCare 537 }else{ 538 io.rdcache.s0_pc := instMicroOp.uop.pc 539 io.rdcache.s1_pc := instMicroOp.uop.pc 540 io.rdcache.s2_pc := instMicroOp.uop.pc 541 } 542 io.rdcache.replacementUpdated := false.B 543 io.rdcache.is128Req := false.B 544 545 546 /** 547 * write data to sbuffer 548 * */ 549 sbufferOut.bits := DontCare 550 sbufferOut.valid := state === s_send_data && segmentActive 551 sbufferOut.bits.vecValid := state === s_send_data && segmentActive 552 sbufferOut.bits.mask := wmask 553 sbufferOut.bits.data := flowData 554 sbufferOut.bits.vaddr := latchVaddr 555 sbufferOut.bits.cmd := MemoryOpConstants.M_XWR 556 sbufferOut.bits.id := DontCare 557 sbufferOut.bits.addr := instMicroOp.paddr 558 559 NewPipelineConnect( 560 sbufferOut, io.sbuffer, io.sbuffer.fire, 561 false.B, 562 Option(s"VSegmentUnitPipelineConnect") 563 ) 564 565 io.vecDifftestInfo.valid := io.sbuffer.valid 566 io.vecDifftestInfo.bits := uopq(deqPtr.value).uop 567 568 /** 569 * update ptr 570 * */ 571 private val fieldActiveWirteFinish = sbufferOut.fire && segmentActive // writedata finish and is a active segment 572 XSError(sbufferOut.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n") 573 574 private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data)) && !segmentActive 575 576 val splitPtrOffset = Mux( 577 isIndexed(instType), 578 Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt), 579 Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt) 580 ) 581 splitPtrNext := 582 Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment 583 // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset. 584 (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt), 585 // next field. 586 (splitPtr + splitPtrOffset) 587 ) 588 589 dontTouch(issueUopFlowNumLog2) 590 dontTouch(issueEmul) 591 dontTouch(splitPtrNext) 592 dontTouch(stridePtr) 593 dontTouch(segmentActive) 594 595 // update splitPtr 596 when(state === s_latch_and_merge_data || (state === s_send_data && (fieldActiveWirteFinish || !segmentActive))){ 597 splitPtr := splitPtrNext 598 }.elsewhen(io.in.fire && !instMicroOpValid){ 599 splitPtr := deqPtr // initial splitPtr 600 } 601 602 // update stridePtr, only use in index 603 val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U) 604 stridePtr := deqPtr + strideOffset 605 606 // update fieldIdx 607 when(io.in.fire && !instMicroOpValid){ // init 608 fieldIdx := 0.U 609 }.elsewhen(state === s_latch_and_merge_data && segmentActive || 610 (state === s_send_data && fieldActiveWirteFinish)){ // only if segment is active 611 612 /* next segment, only if segment complete */ 613 fieldIdx := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U) 614 }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment 615 fieldIdx := 0.U 616 } 617 //update segmentIdx 618 when(io.in.fire && !instMicroOpValid){ 619 segmentIdx := 0.U 620 }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish)) && 621 segmentIdx =/= maxSegIdx){ // next segment, only if segment is active 622 623 segmentIdx := segmentIdx + 1.U 624 }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment 625 segmentIdx := segmentIdx + 1.U 626 } 627 628 //update segmentOffset 629 /* when segment is active or segment is inactive, increase segmentOffset */ 630 when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish))) || 631 segmentInactiveFinish){ 632 633 segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value)) 634 } 635 636 //update deqPtr 637 when((state === s_finish) && !isEmpty(enqPtr, deqPtr)){ 638 deqPtr := deqPtr + 1.U 639 } 640 641 /************************************************************************* 642 * dequeue logic 643 *************************************************************************/ 644 val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx) 645 /*select mask of vd, maybe remove in feature*/ 646 val realEw = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)) 647 val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw) 648 val maskUsed = maskDataVec(vdIdxInField) 649 650 when(stateNext === s_idle){ 651 instMicroOpValid := false.B 652 } 653 // writeback to backend 654 val writebackOut = WireInit(io.uopwriteback.bits) 655 val writebackValid = (state === s_finish) && !isEmpty(enqPtr, deqPtr) 656 writebackOut.uop := uopq(deqPtr.value).uop 657 writebackOut.uop.vpu := instMicroOp.uop.vpu 658 writebackOut.uop.exceptionVec := instMicroOp.uop.exceptionVec 659 writebackOut.mask.get := instMicroOp.mask 660 writebackOut.data := data(deqPtr.value) 661 writebackOut.vdIdx.get := vdIdxInField 662 writebackOut.uop.vpu.vl := instMicroOp.vl 663 writebackOut.uop.vpu.vstart := instMicroOp.vstart 664 writebackOut.uop.vpu.vmask := maskUsed 665 writebackOut.uop.vpu.vuopIdx := uopq(deqPtr.value).uop.vpu.vuopIdx 666 writebackOut.debug := DontCare 667 writebackOut.vdIdxInField.get := vdIdxInField 668 writebackOut.uop.robIdx := instMicroOp.uop.robIdx 669 writebackOut.uop.fuOpType := instMicroOp.uop.fuOpType 670 671 io.uopwriteback.valid := RegNext(writebackValid) 672 io.uopwriteback.bits := RegEnable(writebackOut, writebackValid) 673 674 dontTouch(writebackValid) 675 676 //to RS 677 val feedbackOut = WireInit(0.U.asTypeOf(io.feedback.bits)) 678 val feedbackValid = state === s_finish && !isEmpty(enqPtr, deqPtr) 679 feedbackOut.hit := true.B 680 feedbackOut.robIdx := instMicroOp.uop.robIdx 681 feedbackOut.sourceType := DontCare 682 feedbackOut.flushState := DontCare 683 feedbackOut.dataInvalidSqIdx := DontCare 684 feedbackOut.sqIdx := uopq(deqPtr.value).uop.sqIdx 685 feedbackOut.lqIdx := uopq(deqPtr.value).uop.lqIdx 686 687 io.feedback.valid := RegNext(feedbackValid) 688 io.feedback.bits := RegEnable(feedbackOut, feedbackValid) 689 690 dontTouch(feedbackValid) 691 692 // exception 693 io.exceptionInfo := DontCare 694 io.exceptionInfo.bits.robidx := instMicroOp.uop.robIdx 695 io.exceptionInfo.bits.uopidx := uopq(deqPtr.value).uop.vpu.vuopIdx 696 io.exceptionInfo.bits.vstart := instMicroOp.exceptionVstart 697 io.exceptionInfo.bits.vaddr := instMicroOp.exceptionVaddr 698 io.exceptionInfo.bits.gpaddr := instMicroOp.exceptionGpaddr 699 io.exceptionInfo.bits.isForVSnonLeafPTE := instMicroOp.exceptionIsForVSnonLeafPTE 700 io.exceptionInfo.bits.vl := instMicroOp.exceptionVl 701 io.exceptionInfo.valid := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && !isEmpty(enqPtr, deqPtr) 702} 703 704