1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.rob.RobPtr 26import xiangshan.backend.Bundles._ 27import xiangshan.mem._ 28import xiangshan.backend.fu.FuType 29import freechips.rocketchip.diplomacy.BufferParams 30import xiangshan.cache.mmu._ 31import xiangshan.cache._ 32import xiangshan.cache.wpu.ReplayCarry 33import xiangshan.backend.fu.util.SdtrigExt 34import xiangshan.ExceptionNO._ 35import xiangshan.backend.fu.vector.Bundles.VConfig 36 37class VSegmentBundle(implicit p: Parameters) extends VLSUBundle 38{ 39 val vaddr = UInt(VAddrBits.W) 40 val uop = new DynInst 41 val paddr = UInt(PAddrBits.W) 42 val mask = UInt(VLEN.W) 43 val valid = Bool() 44 val alignedType = UInt(alignTypeBits.W) 45 val vl = UInt(elemIdxBits.W) 46 val vlmaxInVd = UInt(elemIdxBits.W) 47 val vlmaxMaskInVd = UInt(elemIdxBits.W) 48 // for exception 49 val vstart = UInt(elemIdxBits.W) 50 val exceptionvaddr = UInt(VAddrBits.W) 51 val exception_va = Bool() 52 val exception_pa = Bool() 53} 54 55class VSegmentUnit (implicit p: Parameters) extends VLSUModule 56 with HasDCacheParameters 57 with MemoryOpConstants 58 with SdtrigExt 59 with HasLoadHelper 60{ 61 val io = IO(new VSegmentUnitIO) 62 63 val maxSize = VSegmentBufferSize 64 65 class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){ 66 } 67 68 object VSegUPtr { 69 def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = { 70 val ptr = Wire(new VSegUPtr) 71 ptr.flag := f 72 ptr.value := v 73 ptr 74 } 75 } 76 77 // buffer uop 78 val instMicroOp = Reg(new VSegmentBundle) 79 val data = Reg(Vec(maxSize, UInt(VLEN.W))) 80 val uopIdx = Reg(Vec(maxSize, UopIdx())) 81 val stride = Reg(Vec(maxSize, UInt(VLEN.W))) 82 val allocated = RegInit(VecInit(Seq.fill(maxSize)(false.B))) 83 val enqPtr = RegInit(0.U.asTypeOf(new VSegUPtr)) 84 val deqPtr = RegInit(0.U.asTypeOf(new VSegUPtr)) 85 val stridePtr = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index 86 87 val segmentIdx = RegInit(0.U(elemIdxBits.W)) 88 val fieldIdx = RegInit(0.U(fieldBits.W)) 89 val segmentOffset = RegInit(0.U(VAddrBits.W)) 90 val splitPtr = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data 91 val splitPtrNext = WireInit(0.U.asTypeOf(new VSegUPtr)) 92 93 val exception_va = WireInit(false.B) 94 val exception_pa = WireInit(false.B) 95 96 val maxSegIdx = instMicroOp.vl 97 val maxNfields = instMicroOp.uop.vpu.nf 98 99 XSError(segmentIdx > maxSegIdx, s"segmentIdx > vl, something error!\n") 100 XSError(fieldIdx > maxNfields, s"fieldIdx > nfields, something error!\n") 101 102 // Segment instruction's FSM 103 /* 104 * s_idle: wait request 105 * s_flush_sbuffer_req: flush sbuffer 106 * s_wait_flush_sbuffer_resp: wait sbuffer empty 107 * s_tlb_req: 108 * s_wait_tlb_resp: 109 * s_pm: 110 * s_cache_req: 111 * s_cache_resp: 112 * s_latch_and_merge_data: 113 * s_send_data: 114 * s_finish: 115 * */ 116 val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_latch_and_merge_data :: s_send_data :: s_finish :: Nil = Enum(11) 117 val state = RegInit(s_idle) 118 val stateNext = WireInit(s_idle) 119 val sbufferEmpty = io.flush_sbuffer.empty 120 121 /** 122 * state update 123 */ 124 state := stateNext 125 126 /** 127 * state transfer 128 */ 129 when(state === s_idle){ 130 stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle) 131 }.elsewhen(state === s_flush_sbuffer_req){ 132 stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb 133 134 }.elsewhen(state === s_wait_flush_sbuffer_resp){ 135 stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) 136 137 }.elsewhen(state === s_tlb_req){ 138 stateNext := s_wait_tlb_resp 139 140 }.elsewhen(state === s_wait_tlb_resp){ 141 stateNext := Mux(!io.dtlb.resp.bits.miss && io.dtlb.resp.fire, s_pm, s_tlb_req) 142 143 }.elsewhen(state === s_pm){ 144 stateNext := Mux(exception_pa || exception_va, s_finish, s_cache_req) 145 146 }.elsewhen(state === s_cache_req){ 147 stateNext := Mux(io.wdcache.req.fire || io.rdcache.req.fire, s_cache_resp, s_cache_req) 148 149 }.elsewhen(state === s_cache_resp){ 150 when(io.wdcache.resp.fire || io.rdcache.resp.fire) { 151 when(io.wdcache.resp.bits.miss && io.rdcache.resp.bits.miss) { 152 stateNext := s_cache_req 153 }.otherwise { 154 stateNext := Mux(FuType.isVLoad(instMicroOp.uop.fuType), s_latch_and_merge_data, s_send_data) 155 } 156 }.otherwise{ 157 stateNext := s_cache_resp 158 } 159 160 }.elsewhen(state === s_latch_and_merge_data) { 161 when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields)) { 162 stateNext := s_finish // segment instruction finish 163 }.otherwise { 164 stateNext := s_tlb_req // need continue 165 } 166 167 }.elsewhen(state === s_send_data) { // when sbuffer accept data 168 when(!io.sbuffer.fire) { 169 stateNext := s_send_data 170 }.elsewhen((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields)) { 171 stateNext := s_finish // segment instruction finish 172 }.otherwise { 173 stateNext := s_tlb_req // need continue 174 } 175 }.elsewhen(state === s_finish){ // writeback uop 176 stateNext := Mux(distanceBetween(enqPtr, deqPtr) === 0.U, s_idle, s_finish) 177 178 }.otherwise{ 179 stateNext := s_idle 180 XSError(true.B, s"Unknown state!\n") 181 } 182 183 /************************************************************************* 184 * enqueue logic 185 *************************************************************************/ 186 io.in.ready := true.B 187 val fuOpType = io.in.bits.uop.fuOpType 188 val vtype = io.in.bits.uop.vpu.vtype 189 val mop = fuOpType(6, 5) 190 val instType = Cat(true.B, mop) 191 val eew = io.in.bits.uop.vpu.veew 192 val sew = vtype.vsew 193 val lmul = vtype.vlmul 194 val vl = instMicroOp.vl 195 val vm = instMicroOp.uop.vpu.vm 196 val vstart = instMicroOp.uop.vpu.vstart 197 val srcMask = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true) 198 // first uop enqueue, we need to latch microOp of segment instruction 199 when(io.in.fire && !instMicroOp.valid){ 200 val vlmaxInVd = GenVLMAX(Mux(lmul.asSInt > 0.S, 0.U, lmul), Mux(isIndexed(instType), sew(1, 0), eew(1, 0))) // element number in a vd 201 instMicroOp.vaddr := io.in.bits.src_rs1(VAddrBits - 1, 0) 202 instMicroOp.valid := true.B // if is first uop 203 instMicroOp.alignedType := Mux(isIndexed(instType), sew(1, 0), eew(1, 0)) 204 instMicroOp.uop := io.in.bits.uop 205 instMicroOp.mask := srcMask 206 instMicroOp.vstart := 0.U 207 instMicroOp.vlmaxInVd := vlmaxInVd 208 instMicroOp.vlmaxMaskInVd := UIntToMask(vlmaxInVd, elemIdxBits) // for merge data 209 instMicroOp.vl := io.in.bits.src_vl.asTypeOf(VConfig()).vl 210 segmentOffset := 0.U 211 fieldIdx := 0.U 212 } 213 // latch data 214 when(io.in.fire){ 215 data(enqPtr.value) := io.in.bits.src_vs3 216 stride(enqPtr.value) := io.in.bits.src_stride 217 uopIdx(enqPtr.value) := io.in.bits.uop.vpu.vuopIdx 218 } 219 220 // update enqptr, only 1 port 221 when(io.in.fire){ 222 enqPtr := enqPtr + 1.U 223 } 224 225 /************************************************************************* 226 * output logic 227 *************************************************************************/ 228 // MicroOp 229 val baseVaddr = instMicroOp.vaddr 230 val alignedType = instMicroOp.alignedType 231 val fuType = instMicroOp.uop.fuType 232 val mask = instMicroOp.mask 233 val exceptionVec = instMicroOp.uop.exceptionVec 234 val issueEew = instMicroOp.uop.vpu.veew 235 val issueLmul = instMicroOp.uop.vpu.vtype.vlmul 236 val issueSew = instMicroOp.uop.vpu.vtype.vsew 237 val issueEmul = EewLog2(issueEew) - issueSew + issueLmul 238 val elemIdxInVd = segmentIdx & instMicroOp.vlmaxMaskInVd 239 val issueInstType = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction 240 val issueVLMAXLog2 = GenVLMAXLog2( 241 Mux(issueLmul.asSInt > 0.S, 0.U, issueLmul), 242 Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)) 243 ) // max element number log2 in vd 244 val issueVlMax = instMicroOp.vlmaxInVd // max elementIdx in vd 245 val issueMaxIdxInIndex = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew) // index element index in index register 246 val issueMaxIdxInIndexMask = UIntToMask(issueMaxIdxInIndex, elemIdxBits) 247 val issueMaxIdxInIndexLog2 = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew) 248 val issueIndexIdx = segmentIdx & issueMaxIdxInIndexMask 249 250 val indexStride = IndexAddr( // index for indexed instruction 251 index = stride(stridePtr.value), 252 flow_inner_idx = issueIndexIdx, 253 eew = issueEew 254 ) 255 val realSegmentOffset = Mux(isIndexed(issueInstType), 256 indexStride, 257 segmentOffset) 258 val vaddr = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset 259 /** 260 * tlb req and tlb resq 261 */ 262 263 // query DTLB IO Assign 264 io.dtlb.req := DontCare 265 io.dtlb.resp.ready := true.B 266 io.dtlb.req.valid := state === s_tlb_req 267 io.dtlb.req.bits.cmd := Mux(FuType.isVLoad(fuType), TlbCmd.read, TlbCmd.write) 268 io.dtlb.req.bits.vaddr := vaddr 269 io.dtlb.req.bits.size := instMicroOp.alignedType(2,0) 270 io.dtlb.req.bits.memidx.is_ld := FuType.isVLoad(fuType) 271 io.dtlb.req.bits.memidx.is_st := FuType.isVStore(fuType) 272 io.dtlb.req.bits.debug.robIdx := instMicroOp.uop.robIdx 273 io.dtlb.req.bits.no_translate := false.B 274 io.dtlb.req.bits.debug.pc := instMicroOp.uop.pc 275 io.dtlb.req.bits.debug.isFirstIssue := DontCare 276 io.dtlb.req_kill := false.B 277 278 // tlb resp 279 when(io.dtlb.resp.fire && state === s_wait_tlb_resp){ 280 exceptionVec(storePageFault) := io.dtlb.resp.bits.excp(0).pf.st 281 exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp(0).pf.ld 282 exceptionVec(storeAccessFault) := io.dtlb.resp.bits.excp(0).af.st 283 exceptionVec(loadAccessFault) := io.dtlb.resp.bits.excp(0).af.ld 284 when(!io.dtlb.resp.bits.miss){ 285 instMicroOp.paddr := io.dtlb.resp.bits.paddr(0) 286 } 287 } 288 // pmp 289 // NOTE: only handle load/store exception here, if other exception happens, don't send here 290 val pmp = WireInit(io.pmpResp) 291 when(state === s_pm){ 292 exception_va := exceptionVec(storePageFault) || exceptionVec(loadPageFault) || 293 exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) 294 exception_pa := pmp.st || pmp.ld 295 296 instMicroOp.exception_pa := exception_pa 297 instMicroOp.exception_va := exception_va 298 // update storeAccessFault bit 299 exceptionVec(loadAccessFault) := exceptionVec(loadAccessFault) || pmp.ld 300 exceptionVec(storeAccessFault) := exceptionVec(storeAccessFault) || pmp.st 301 302 instMicroOp.exceptionvaddr := vaddr 303 instMicroOp.vl := segmentIdx // for exception 304 instMicroOp.vstart := segmentIdx // for exception 305 } 306 307 /** 308 * flush sbuffer IO Assign 309 */ 310 io.flush_sbuffer.valid := !sbufferEmpty && (state === s_flush_sbuffer_req) 311 312 313 /** 314 * merge data for load 315 */ 316 val cacheData = io.rdcache.resp.bits.data 317 val pickData = rdataVecHelper(alignedType(1,0), cacheData) 318 val mergedData = mergeDataWithElemIdx( 319 oldData = data(splitPtr.value), 320 newData = Seq(pickData), 321 alignedType = alignedType(1,0), 322 elemIdx = Seq(elemIdxInVd), 323 valids = Seq(true.B) 324 ) 325 when(state === s_latch_and_merge_data){ 326 data(splitPtr.value) := mergedData 327 } 328 /** 329 * split data for store 330 * */ 331 val splitData = genVSData( 332 data = data(splitPtr.value), 333 elemIdx = elemIdxInVd, 334 alignedType = alignedType 335 ) 336 val flowData = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data 337 val wmask = genVWmask(vaddr, alignedType(1, 0)) & mask(segmentIdx) 338 339 /** 340 * rdcache req 341 */ 342 io.rdcache.req := DontCare 343 io.rdcache.req.valid := state === s_cache_req && FuType.isVLoad(fuType) 344 io.rdcache.req.bits.cmd := MemoryOpConstants.M_XRD 345 io.rdcache.req.bits.vaddr := vaddr 346 io.rdcache.req.bits.mask := mask 347 io.rdcache.req.bits.data := flowData 348 io.rdcache.pf_source := LOAD_SOURCE.U 349 io.rdcache.req.bits.id := DontCare 350 io.rdcache.resp.ready := true.B 351 io.rdcache.s1_paddr_dup_lsu := instMicroOp.paddr 352 io.rdcache.s1_paddr_dup_dcache := instMicroOp.paddr 353 io.rdcache.s1_kill := false.B 354 io.rdcache.s2_kill := false.B 355 if (env.FPGAPlatform){ 356 io.rdcache.s0_pc := DontCare 357 io.rdcache.s1_pc := DontCare 358 io.rdcache.s2_pc := DontCare 359 }else{ 360 io.rdcache.s0_pc := instMicroOp.uop.pc 361 io.rdcache.s1_pc := instMicroOp.uop.pc 362 io.rdcache.s2_pc := instMicroOp.uop.pc 363 } 364 io.rdcache.replacementUpdated := false.B 365 io.rdcache.is128Req := false.B 366 367 /** 368 * wdcache req 369 * */ 370 io.wdcache.req := DontCare 371 io.wdcache.req.valid := state === s_cache_req && FuType.isVStore(fuType) 372 io.wdcache.req.bits.cmd := MemoryOpConstants.M_PFW 373 io.wdcache.req.bits.vaddr := vaddr 374 io.wdcache.resp.ready := true.B 375 io.wdcache.s1_paddr := instMicroOp.paddr 376 io.wdcache.s1_kill := false.B 377 io.wdcache.s2_kill := false.B 378 io.wdcache.s2_pc := instMicroOp.uop.pc 379 380 381 /** 382 * write data to sbuffer 383 * */ 384 385 io.sbuffer.bits := DontCare 386 io.sbuffer.valid := state === s_send_data 387 io.sbuffer.bits.mask := wmask 388 io.sbuffer.bits.data := flowData 389 io.sbuffer.bits.vaddr := vaddr 390 io.sbuffer.bits.cmd := MemoryOpConstants.M_XWR 391 io.sbuffer.bits.id := DontCare 392 io.sbuffer.bits.addr := instMicroOp.paddr 393 394 /** 395 * update ptr 396 * */ 397 398 val splitPtrOffset = Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt) 399 splitPtrNext := PriorityMux(Seq( 400 ((fieldIdx === maxNfields) && (elemIdxInVd === (issueVlMax - 1.U))) -> (deqPtr + // segment finish and need access next register in group 401 (segmentIdx >> issueVLMAXLog2).asUInt), 402 (fieldIdx === maxNfields) -> deqPtr, // segment finish 403 true.B -> (splitPtr + splitPtrOffset) // next field 404 )) 405 406 // update splitPtr 407 when(state === s_latch_and_merge_data){ 408 splitPtr := splitPtrNext 409 }.elsewhen(io.in.fire && !instMicroOp.valid){ 410 splitPtr := deqPtr // initial splitPtr 411 } 412 413 // update stridePtr, only use in index 414 val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U) 415 stridePtr := deqPtr + strideOffset 416 417 // update fieldIdx 418 when(fieldIdx === maxNfields && state === s_latch_and_merge_data){ 419 fieldIdx := 0.U 420 }.elsewhen(state === s_latch_and_merge_data){ 421 fieldIdx := fieldIdx + 1.U 422 } 423 //update segmentOffset 424 when(fieldIdx === maxNfields && state === s_latch_and_merge_data){ 425 segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew, stride(stridePtr.value)) 426 } 427 428 //update deqPtr 429 when(io.uopwriteback.fire){ 430 deqPtr := deqPtr + 1.U 431 } 432 433 /************************************************************************* 434 * dequeue logic 435 *************************************************************************/ 436 when(stateNext === s_idle){ 437 instMicroOp.valid := false.B 438 } 439 io.uopwriteback.valid := state === s_finish 440 io.uopwriteback.bits.uop := instMicroOp.uop 441 io.uopwriteback.bits.mask.get := instMicroOp.mask 442 io.uopwriteback.bits.data := data(deqPtr.value) 443 io.uopwriteback.bits.vdIdx.get := uopIdx(deqPtr.value) 444 io.uopwriteback.bits.uop.vpu.vl := instMicroOp.vl 445 io.uopwriteback.bits.uop.vpu.vstart := instMicroOp.vstart 446 io.uopwriteback.bits.debug := DontCare 447 io.uopwriteback.bits.vdIdxInField.get := DontCare 448 449 //to RS 450 io.feedback.valid := state === s_finish 451 io.feedback.bits.hit := true.B 452 io.feedback.bits.robIdx := instMicroOp.uop.robIdx 453 io.feedback.bits.sourceType := DontCare 454 io.feedback.bits.flushState := DontCare 455 io.feedback.bits.dataInvalidSqIdx := DontCare 456 io.feedback.bits.uopIdx.get := uopIdx(deqPtr.value) 457 458 // exception 459 io.exceptionAddr := DontCare // TODO: fix it when handle exception 460} 461 462